@sebastianandreasson/pi-autonomous-agents 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,6 +93,7 @@ The command removes configured harness history/runtime files and verifies that n
93
93
 
94
94
  For prompt debugging, the harness also writes the exact assembled prompt for the current role to `.pi-last-prompt.txt` by default.
95
95
  For flow debugging, it also writes a machine-readable `.pi-last-iteration.json` summary with the selected task, tester verdict, commit-plan state, and terminal reason.
96
+ For run isolation, the supervisor also maintains `.pi-runtime/active-run.json` and stores PI sessions plus per-run telemetry under `.pi-runtime/runs/<runId>/`.
96
97
 
97
98
  ## Generic Contracts
98
99
 
@@ -113,6 +114,8 @@ Keep TODO items extremely small and implementation-shaped when using weaker loca
113
114
 
114
115
  The adapter heartbeat is PI-RPC-event based. Streaming shell output does not count as progress on its own, so long-running tools should rely on the tool-aware watchdog thresholds rather than terminal streaming.
115
116
 
117
+ The supervisor now enforces single-run ownership per repo/config. If a stale run crashed mid-iteration, the next run recovers the unfinished iteration number from `.pi-state.json` instead of silently rolling forward.
118
+
116
119
  `piModel` remains the default text model, but you can override specific roles with `roleModels` such as `developer`, `developerRetry`, `developerFix`, `tester`, and `visualReview`. `testerCommit` is only relevant if you opt back into `commitMode: "plan"`.
117
120
 
118
121
  By default, successful tester passes should stage and create the commit directly in the same PI turn. The old commit-plan parsing flow is still available as `commitMode: "plan"`, but it is now a compatibility mode rather than the default.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@sebastianandreasson/pi-autonomous-agents",
3
3
  "private": false,
4
- "version": "0.4.0",
4
+ "version": "0.5.0",
5
5
  "type": "module",
6
6
  "description": "Portable unattended PI harness for developer/tester/visual-review loops.",
7
7
  "license": "MIT",
package/src/pi-client.mjs CHANGED
@@ -103,7 +103,7 @@ async function runAdapterTurn({ config, model, sessionId, sessionFile, prompt, i
103
103
  instructionsFile: config.instructionsFile,
104
104
  developerInstructionsFile: config.developerInstructionsFile,
105
105
  testerInstructionsFile: config.testerInstructionsFile,
106
- runtimeDir: config.piRuntimeDir,
106
+ runtimeDir: config.runRuntimeDir || config.piRuntimeDir,
107
107
  piCli: config.piCli,
108
108
  model: model ?? config.piModel,
109
109
  tools: config.piTools,
package/src/pi-config.mjs CHANGED
@@ -246,6 +246,7 @@ export function loadConfig(mode = 'once') {
246
246
  lastPromptFile: resolveFromCwd(cwd, 'PI_LAST_PROMPT_FILE', file.lastPromptFile, '.pi-last-prompt.txt'),
247
247
  lastIterationSummaryFile: resolveFromCwd(cwd, 'PI_LAST_ITERATION_SUMMARY_FILE', file.lastIterationSummaryFile, '.pi-last-iteration.json'),
248
248
  piRuntimeDir: resolveFromCwd(cwd, 'PI_RUNTIME_DIR', file.piRuntimeDir, '.pi-runtime'),
249
+ activeRunFile: resolveFromCwd(cwd, 'PI_ACTIVE_RUN_FILE', file.activeRunFile, '.pi-runtime/active-run.json'),
249
250
  piCli: readString('PI_CLI', file.piCli, 'pi'),
250
251
  piModel,
251
252
  piModelProfile: resolvedPiModel,
package/src/pi-repo.mjs CHANGED
@@ -1,6 +1,7 @@
1
1
  import fs from 'node:fs/promises'
2
2
  import { readFileSync } from 'node:fs'
3
3
  import process from 'node:process'
4
+ import { randomUUID } from 'node:crypto'
4
5
  import { execFileSync, spawn } from 'node:child_process'
5
6
  import path from 'node:path'
6
7
 
@@ -9,7 +10,17 @@ export function timestamp() {
9
10
  }
10
11
 
11
12
  export async function appendLog(logFile, message) {
12
- await fs.appendFile(logFile, `[${timestamp()}] ${message}\n`, 'utf8')
13
+ const runId = String(process.env.PI_RUN_ID ?? '').trim()
14
+ const prefix = runId !== '' ? `[run:${runId}] ` : ''
15
+ const line = `[${timestamp()}] ${prefix}${message}\n`
16
+ await fs.mkdir(path.dirname(logFile), { recursive: true })
17
+ await fs.appendFile(logFile, line, 'utf8')
18
+
19
+ const runLogFile = String(process.env.PI_RUN_LOG_FILE ?? '').trim()
20
+ if (runLogFile !== '' && runLogFile !== logFile) {
21
+ await fs.mkdir(path.dirname(runLogFile), { recursive: true })
22
+ await fs.appendFile(runLogFile, line, 'utf8')
23
+ }
13
24
  }
14
25
 
15
26
  export function ensureRepo(cwd) {
@@ -30,7 +41,27 @@ export async function ensureFileExists(filePath, label) {
30
41
  export async function readState(stateFile) {
31
42
  try {
32
43
  const raw = await fs.readFile(stateFile, 'utf8')
33
- return JSON.parse(raw)
44
+ const parsed = JSON.parse(raw)
45
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
46
+ throw new Error('Invalid state file payload')
47
+ }
48
+ return {
49
+ iteration: 0,
50
+ lastTransport: '',
51
+ lastPiModel: '',
52
+ sessionId: '',
53
+ sessionFile: '',
54
+ consecutiveFailures: 0,
55
+ successfulIterations: 0,
56
+ lastPhase: '',
57
+ lastStatus: '',
58
+ lastVerificationStatus: '',
59
+ lastVisualStatus: '',
60
+ lastRunAt: '',
61
+ runId: '',
62
+ inProgress: null,
63
+ ...parsed,
64
+ }
34
65
  } catch {
35
66
  return {
36
67
  iteration: 0,
@@ -38,22 +69,165 @@ export async function readState(stateFile) {
38
69
  lastPiModel: '',
39
70
  sessionId: '',
40
71
  sessionFile: '',
41
- consecutiveFailures: 0,
42
- successfulIterations: 0,
43
- lastPhase: '',
44
- lastStatus: '',
45
- lastVerificationStatus: '',
46
- lastVisualStatus: '',
47
- lastRunAt: '',
48
- }
72
+ consecutiveFailures: 0,
73
+ successfulIterations: 0,
74
+ lastPhase: '',
75
+ lastStatus: '',
76
+ lastVerificationStatus: '',
77
+ lastVisualStatus: '',
78
+ lastRunAt: '',
79
+ runId: '',
80
+ inProgress: null,
81
+ }
49
82
  }
50
83
  }
51
84
 
52
85
  export async function writeState(stateFile, state) {
53
86
  const formatted = `${JSON.stringify(state, null, 2)}\n`
87
+ await fs.mkdir(path.dirname(stateFile), { recursive: true })
54
88
  await fs.writeFile(stateFile, formatted, 'utf8')
55
89
  }
56
90
 
91
+ export function createRunId() {
92
+ return randomUUID()
93
+ }
94
+
95
+ function normalizePid(raw) {
96
+ const pid = Number.parseInt(String(raw ?? ''), 10)
97
+ return Number.isInteger(pid) && pid > 0 ? pid : 0
98
+ }
99
+
100
+ export function isProcessRunning(pid) {
101
+ const normalizedPid = normalizePid(pid)
102
+ if (normalizedPid <= 0) {
103
+ return false
104
+ }
105
+
106
+ try {
107
+ process.kill(normalizedPid, 0)
108
+ return true
109
+ } catch (error) {
110
+ if (error && typeof error === 'object' && 'code' in error) {
111
+ return error.code === 'EPERM'
112
+ }
113
+ return false
114
+ }
115
+ }
116
+
117
+ export async function readJsonFile(filePath, fallback = null) {
118
+ try {
119
+ const raw = await fs.readFile(filePath, 'utf8')
120
+ return JSON.parse(raw)
121
+ } catch {
122
+ return fallback
123
+ }
124
+ }
125
+
126
+ async function writeJsonFile(filePath, value, flags) {
127
+ const formatted = `${JSON.stringify(value, null, 2)}\n`
128
+ await fs.mkdir(path.dirname(filePath), { recursive: true })
129
+ await fs.writeFile(filePath, formatted, { encoding: 'utf8', flag: flags })
130
+ }
131
+
132
+ export async function acquireRunLock(lockFile, lockState) {
133
+ const desired = {
134
+ runId: String(lockState?.runId ?? ''),
135
+ pid: normalizePid(lockState?.pid),
136
+ startedAt: String(lockState?.startedAt ?? timestamp()),
137
+ heartbeatAt: String(lockState?.heartbeatAt ?? timestamp()),
138
+ status: String(lockState?.status ?? 'starting'),
139
+ iteration: Number.isFinite(Number(lockState?.iteration)) ? Number(lockState.iteration) : 0,
140
+ phase: String(lockState?.phase ?? ''),
141
+ task: String(lockState?.task ?? ''),
142
+ mode: String(lockState?.mode ?? ''),
143
+ configFile: String(lockState?.configFile ?? ''),
144
+ cwd: String(lockState?.cwd ?? ''),
145
+ }
146
+
147
+ await fs.mkdir(path.dirname(lockFile), { recursive: true })
148
+
149
+ try {
150
+ await writeJsonFile(lockFile, desired, 'wx')
151
+ return { acquired: true, staleLock: null }
152
+ } catch (error) {
153
+ if (!error || typeof error !== 'object' || !('code' in error) || error.code !== 'EEXIST') {
154
+ throw error
155
+ }
156
+ }
157
+
158
+ const existing = await readJsonFile(lockFile, null)
159
+ const existingPid = normalizePid(existing?.pid)
160
+ if (existing && existingPid > 0 && isProcessRunning(existingPid) && existingPid !== process.pid) {
161
+ throw new Error(
162
+ `Another pi-harness run is active (runId=${String(existing.runId ?? '')} pid=${existingPid} startedAt=${String(existing.startedAt ?? '')}).`
163
+ )
164
+ }
165
+
166
+ await fs.rm(lockFile, { force: true })
167
+
168
+ try {
169
+ await writeJsonFile(lockFile, desired, 'wx')
170
+ } catch (error) {
171
+ if (error && typeof error === 'object' && 'code' in error && error.code === 'EEXIST') {
172
+ const current = await readJsonFile(lockFile, null)
173
+ throw new Error(
174
+ `Another pi-harness run acquired the lock first (runId=${String(current?.runId ?? '')} pid=${String(current?.pid ?? '')}).`
175
+ )
176
+ }
177
+ throw error
178
+ }
179
+
180
+ return { acquired: true, staleLock: existing }
181
+ }
182
+
183
+ export async function updateRunLock(lockFile, lockState) {
184
+ const current = await readJsonFile(lockFile, null)
185
+ if (!current) {
186
+ return false
187
+ }
188
+
189
+ const next = {
190
+ ...current,
191
+ ...lockState,
192
+ pid: normalizePid(lockState?.pid ?? current.pid),
193
+ heartbeatAt: String(lockState?.heartbeatAt ?? timestamp()),
194
+ }
195
+ await writeJsonFile(lockFile, next)
196
+ return true
197
+ }
198
+
199
+ export async function releaseRunLock(lockFile, runId) {
200
+ const current = await readJsonFile(lockFile, null)
201
+ if (!current) {
202
+ return false
203
+ }
204
+
205
+ if (String(current.runId ?? '') !== String(runId ?? '')) {
206
+ return false
207
+ }
208
+
209
+ await fs.rm(lockFile, { force: true })
210
+ return true
211
+ }
212
+
213
+ export function signalProcessTree(pid, signal) {
214
+ const normalizedPid = normalizePid(pid)
215
+ if (normalizedPid <= 0) {
216
+ return false
217
+ }
218
+
219
+ try {
220
+ if (process.platform !== 'win32') {
221
+ process.kill(-normalizedPid, signal)
222
+ } else {
223
+ process.kill(normalizedPid, signal)
224
+ }
225
+ return true
226
+ } catch {
227
+ return false
228
+ }
229
+ }
230
+
57
231
  export async function readSessionId(sessionFile) {
58
232
  try {
59
233
  return (await fs.readFile(sessionFile, 'utf8')).trim()
@@ -297,6 +471,7 @@ export async function runShellCommand({
297
471
  const child = spawn('/bin/zsh', ['-lc', command], {
298
472
  cwd,
299
473
  env: process.env,
474
+ detached: process.platform !== 'win32',
300
475
  stdio: ['pipe', 'pipe', 'pipe'],
301
476
  })
302
477
 
@@ -308,9 +483,9 @@ export async function runShellCommand({
308
483
 
309
484
  killTimer = setTimeout(() => {
310
485
  timedOut = true
311
- child.kill('SIGTERM')
486
+ signalProcessTree(child.pid, 'SIGTERM')
312
487
  forceKillTimer = setTimeout(() => {
313
- child.kill('SIGKILL')
488
+ signalProcessTree(child.pid, 'SIGKILL')
314
489
  }, 10000)
315
490
  }, timeoutSeconds * 1000)
316
491
 
@@ -10,6 +10,7 @@ import {
10
10
  getHeartbeatDecision,
11
11
  resolveHeartbeatConfig,
12
12
  } from './pi-heartbeat.mjs'
13
+ import { signalProcessTree } from './pi-repo.mjs'
13
14
 
14
15
  function createJsonlReader(stream, onLine) {
15
16
  const rl = createInterface({ input: stream })
@@ -151,6 +152,7 @@ async function run() {
151
152
  const child = spawn(cli, args, {
152
153
  cwd: request.cwd,
153
154
  env: process.env,
155
+ detached: process.platform !== 'win32',
154
156
  stdio: ['pipe', 'pipe', 'pipe'],
155
157
  })
156
158
 
@@ -239,10 +241,10 @@ async function run() {
239
241
  closeAssistantLine()
240
242
  writeLive(`[PI guard] ${formatHeartbeatTimeoutMessage(decision)} Aborting current turn (pid=${child.pid ?? 'unknown'}).\n`)
241
243
  void send({ type: 'abort' }).catch(() => {})
242
- child.kill('SIGTERM')
244
+ signalProcessTree(child.pid, 'SIGTERM')
243
245
  setTimeout(() => {
244
246
  if (child.exitCode === null) {
245
- child.kill('SIGKILL')
247
+ signalProcessTree(child.pid, 'SIGKILL')
246
248
  }
247
249
  }, 1000)
248
250
  }
@@ -578,10 +580,10 @@ async function run() {
578
580
  }
579
581
  pending.clear()
580
582
 
581
- child.kill('SIGTERM')
583
+ signalProcessTree(child.pid, 'SIGTERM')
582
584
  await new Promise((resolve) => {
583
585
  const timeout = setTimeout(() => {
584
- child.kill('SIGKILL')
586
+ signalProcessTree(child.pid, 'SIGKILL')
585
587
  resolve()
586
588
  }, 1000)
587
589
 
@@ -12,9 +12,11 @@ import {
12
12
  } from './pi-prompts.mjs'
13
13
  import { appendTelemetry, ensureTelemetryFiles } from './pi-telemetry.mjs'
14
14
  import {
15
+ acquireRunLock,
15
16
  appendLog,
16
17
  collectLargeFileWarnings,
17
18
  commitStagedFiles,
19
+ createRunId,
18
20
  didRepoChange,
19
21
  ensureFileExists,
20
22
  ensureRepo,
@@ -25,10 +27,12 @@ import {
25
27
  readOptionalTextFile,
26
28
  readSessionId,
27
29
  readState,
30
+ releaseRunLock,
28
31
  runVerification,
29
32
  runShellCommand,
30
33
  stageFiles,
31
34
  unstageFiles,
35
+ updateRunLock,
32
36
  runVisualCapture,
33
37
  timestamp,
34
38
  writeChangedFiles,
@@ -66,7 +70,7 @@ function printTerminalSummary(config, summary) {
66
70
  }
67
71
 
68
72
  const lines = [
69
- `[PI supervisor] iteration=${summary.iteration} phase="${summary.phase}"`,
73
+ `[PI supervisor] run_id=${summary.runId || config.runId || ''} iteration=${summary.iteration} phase="${summary.phase}"`,
70
74
  `[PI supervisor] task=${summary.taskFile || toDisplayPath(config, config.taskFile)} developer_instructions=${summary.developerInstructionsFile || toDisplayPath(config, config.developerInstructionsFile)} tester_instructions=${summary.testerInstructionsFile || toDisplayPath(config, config.testerInstructionsFile)}`,
71
75
  `[PI supervisor] transport=${config.transport} developer_model=${summary.developerModel || resolveRoleModelName(config, 'developer') || '(PI default)'} tester_model=${summary.testerModel || resolveRoleModelName(config, 'tester') || '(PI default)'}`,
72
76
  `[PI supervisor] developer=${summary.developerStatus} tester=${summary.testerStatus} verification=${summary.verificationStatus}`,
@@ -152,9 +156,13 @@ function formatIterationSummary(summary) {
152
156
 
153
157
  async function writeIterationSummary(config, summary) {
154
158
  await writeTextFile(config.lastIterationSummaryFile, formatIterationSummary(summary))
159
+ if (config.runLastIterationSummaryFile && config.runLastIterationSummaryFile !== config.lastIterationSummaryFile) {
160
+ await writeTextFile(config.runLastIterationSummaryFile, formatIterationSummary(summary))
161
+ }
155
162
  }
156
163
 
157
164
  function createIterationSummary({
165
+ runId,
158
166
  iteration,
159
167
  phase,
160
168
  task,
@@ -174,6 +182,7 @@ function createIterationSummary({
174
182
  visualModel,
175
183
  }) {
176
184
  return {
185
+ runId,
177
186
  iteration,
178
187
  phase,
179
188
  task,
@@ -194,6 +203,26 @@ function createIterationSummary({
194
203
  }
195
204
  }
196
205
 
206
+ async function persistStateSnapshot(config, state) {
207
+ await writeState(config.stateFile, state)
208
+ if (config.runStateFile && config.runStateFile !== config.stateFile) {
209
+ await writeState(config.runStateFile, state)
210
+ }
211
+ }
212
+
213
+ async function updateRunOwnership(config, fields = {}) {
214
+ if (!config.activeRunFile || !config.runId) {
215
+ return
216
+ }
217
+
218
+ await updateRunLock(config.activeRunFile, {
219
+ runId: config.runId,
220
+ pid: process.pid,
221
+ heartbeatAt: timestamp(),
222
+ ...fields,
223
+ })
224
+ }
225
+
197
226
  function didInvocationCreateCommit(invocation) {
198
227
  return invocation?.beforeSnapshot?.head !== invocation?.afterSnapshot?.head
199
228
  }
@@ -272,6 +301,7 @@ function isInfrastructureVerificationFailure(output) {
272
301
  async function recordEvent(config, event) {
273
302
  await appendTelemetry(config, {
274
303
  timestamp: timestamp(),
304
+ runId: config.runId || '',
275
305
  ...event,
276
306
  })
277
307
  }
@@ -1076,6 +1106,13 @@ async function runIteration({ config, state, iteration }) {
1076
1106
  const iterationStartSnapshot = getRepoSnapshot(config.cwd)
1077
1107
  const taskInfo = findFirstUncheckedTaskInfo(config.taskFile)
1078
1108
  if (!taskInfo.hasUncheckedTasks) {
1109
+ await updateRunOwnership(config, {
1110
+ status: 'idle',
1111
+ iteration,
1112
+ phase: taskInfo.phase || 'complete',
1113
+ task: '',
1114
+ lastCompletedIteration: iteration,
1115
+ })
1079
1116
  await appendLog(config.logFile, 'No unchecked tasks remain in TODOS.md')
1080
1117
  return {
1081
1118
  stateUpdate: {
@@ -1086,9 +1123,12 @@ async function runIteration({ config, state, iteration }) {
1086
1123
  lastPhase: taskInfo.phase,
1087
1124
  lastStatus: 'complete',
1088
1125
  lastVerificationStatus: 'not_needed',
1126
+ runId: config.runId || '',
1127
+ inProgress: null,
1089
1128
  lastRunAt: timestamp(),
1090
1129
  },
1091
1130
  summary: {
1131
+ runId: config.runId || '',
1092
1132
  iteration,
1093
1133
  phase: taskInfo.phase || 'complete',
1094
1134
  task: '',
@@ -1118,6 +1158,26 @@ async function runIteration({ config, state, iteration }) {
1118
1158
 
1119
1159
  const phase = taskInfo.phase || 'unknown'
1120
1160
  const task = taskInfo.task || 'unknown'
1161
+ const inProgressState = {
1162
+ ...state,
1163
+ runId: config.runId || '',
1164
+ inProgress: {
1165
+ runId: config.runId || '',
1166
+ status: 'in_progress',
1167
+ iteration,
1168
+ phase,
1169
+ task,
1170
+ startedAt: timestamp(),
1171
+ transport: config.transport,
1172
+ },
1173
+ }
1174
+ await persistStateSnapshot(config, inProgressState)
1175
+ await updateRunOwnership(config, {
1176
+ status: 'iteration_in_progress',
1177
+ iteration,
1178
+ phase,
1179
+ task,
1180
+ })
1121
1181
  const canResumePriorSession = (
1122
1182
  state.lastTransport === config.transport
1123
1183
  && state.lastPiModel === developerModelName
@@ -1486,8 +1546,19 @@ async function runIteration({ config, state, iteration }) {
1486
1546
  lastRunAt: timestamp(),
1487
1547
  successfulIterations,
1488
1548
  lastVisualStatus: visualStatus,
1549
+ runId: config.runId || '',
1550
+ inProgress: null,
1489
1551
  }
1490
1552
 
1553
+ await updateRunOwnership(config, {
1554
+ status: 'idle',
1555
+ iteration,
1556
+ phase,
1557
+ task,
1558
+ lastCompletedIteration: iteration,
1559
+ lastStatus: finalStatus,
1560
+ })
1561
+
1491
1562
  await appendLog(
1492
1563
  config.logFile,
1493
1564
  `Finished iteration ${iteration} with status=${finalStatus} verification=${finalVerificationStatus} tester_verdict=${testerVerdict} commit_plan_found=${commitPlanFound} terminal_reason=${terminalReason}${largeFileWarnings.length > 0 ? ` large_file_warnings=${formatLargeFileWarningsInline(largeFileWarnings)}` : ''}`
@@ -1495,6 +1566,7 @@ async function runIteration({ config, state, iteration }) {
1495
1566
 
1496
1567
  const iterationEndSnapshot = getRepoSnapshot(config.cwd)
1497
1568
  const iterationSummary = createIterationSummary({
1569
+ runId: config.runId || '',
1498
1570
  iteration,
1499
1571
  phase,
1500
1572
  task,
@@ -1548,6 +1620,7 @@ async function runIteration({ config, state, iteration }) {
1548
1620
  return {
1549
1621
  stateUpdate: nextState,
1550
1622
  summary: {
1623
+ runId: config.runId || '',
1551
1624
  iteration,
1552
1625
  phase,
1553
1626
  task,
@@ -1578,40 +1651,95 @@ async function runIteration({ config, state, iteration }) {
1578
1651
 
1579
1652
  async function main() {
1580
1653
  const config = loadConfig(process.argv[2] ?? 'once')
1654
+ const runId = createRunId()
1655
+ const runStartedAt = timestamp()
1656
+ const runDir = path.join(config.piRuntimeDir, 'runs', runId)
1657
+ config.runId = runId
1658
+ config.runStartedAt = runStartedAt
1659
+ config.runRuntimeDir = runDir
1660
+ config.runLogFile = path.join(runDir, 'pi.log')
1661
+ config.runTelemetryJsonl = path.join(runDir, 'pi_telemetry.jsonl')
1662
+ config.runTelemetryCsv = path.join(runDir, 'pi_telemetry.csv')
1663
+ config.runStateFile = path.join(runDir, 'state.json')
1664
+ config.runLastIterationSummaryFile = path.join(runDir, 'last-iteration.json')
1665
+
1581
1666
  ensureRepo(config.cwd)
1582
1667
  await ensureFileExists(config.taskFile, 'task file')
1583
1668
  await ensureFileExists(config.developerInstructionsFile, 'developer instructions file')
1584
1669
  await ensureFileExists(config.testerInstructionsFile, 'tester instructions file')
1585
- await ensureTelemetryFiles(config)
1586
- await runStartupPreflight(config)
1587
-
1588
- let state = await readState(config.stateFile)
1589
- let completedIterations = 0
1590
-
1591
- while (!stopRequested) {
1592
- const iteration = state.iteration + 1
1593
- const result = await runIteration({ config, state, iteration })
1594
- await writeIterationSummary(config, result.iterationSummary ?? result.summary)
1595
- state = result.stateUpdate
1596
- await writeState(config.stateFile, state)
1597
- printTerminalSummary(config, result.summary)
1598
- completedIterations += 1
1599
-
1600
- if (result.shouldStop || config.mode !== 'run' || completedIterations >= config.maxIterations) {
1601
- break
1670
+ const lockResult = await acquireRunLock(config.activeRunFile, {
1671
+ runId,
1672
+ pid: process.pid,
1673
+ startedAt: runStartedAt,
1674
+ heartbeatAt: runStartedAt,
1675
+ status: 'starting',
1676
+ iteration: 0,
1677
+ phase: '',
1678
+ task: '',
1679
+ mode: config.mode,
1680
+ configFile: config.configFile,
1681
+ cwd: config.cwd,
1682
+ })
1683
+ try {
1684
+ process.env.PI_RUN_ID = runId
1685
+ process.env.PI_RUN_LOG_FILE = config.runLogFile
1686
+ await ensureTelemetryFiles(config)
1687
+ await appendLog(config.logFile, `Run started pid=${process.pid} mode=${config.mode}`)
1688
+ if (lockResult.staleLock) {
1689
+ await appendLog(
1690
+ config.logFile,
1691
+ `Recovered stale run lock from runId=${String(lockResult.staleLock.runId ?? '')} pid=${String(lockResult.staleLock.pid ?? '')} startedAt=${String(lockResult.staleLock.startedAt ?? '')}`
1692
+ )
1602
1693
  }
1694
+ await runStartupPreflight(config)
1603
1695
 
1604
- await sleep(config.sleepBetweenSeconds)
1605
- }
1696
+ let state = await readState(config.stateFile)
1697
+ if (state?.inProgress?.status === 'in_progress') {
1698
+ await appendLog(
1699
+ config.logFile,
1700
+ `Recovering unfinished iteration=${state.inProgress.iteration} phase="${state.inProgress.phase || ''}" task="${state.inProgress.task || ''}" from runId=${String(state.inProgress.runId || state.runId || '')}`
1701
+ )
1702
+ }
1703
+ let completedIterations = 0
1704
+
1705
+ while (!stopRequested) {
1706
+ const iteration = state?.inProgress?.status === 'in_progress'
1707
+ ? Number(state.inProgress.iteration) || (state.iteration + 1)
1708
+ : state.iteration + 1
1709
+ await updateRunOwnership(config, {
1710
+ status: 'starting_iteration',
1711
+ iteration,
1712
+ })
1713
+ const result = await runIteration({ config, state, iteration })
1714
+ await writeIterationSummary(config, result.iterationSummary ?? result.summary)
1715
+ state = result.stateUpdate
1716
+ await persistStateSnapshot(config, state)
1717
+ printTerminalSummary(config, result.summary)
1718
+ completedIterations += 1
1719
+
1720
+ if (result.shouldStop || config.mode !== 'run' || completedIterations >= config.maxIterations) {
1721
+ break
1722
+ }
1723
+
1724
+ await sleep(config.sleepBetweenSeconds)
1725
+ }
1606
1726
 
1607
- if (stopRequested) {
1608
- await appendLog(config.logFile, 'Stop requested by signal')
1727
+ if (stopRequested) {
1728
+ await appendLog(config.logFile, 'Stop requested by signal')
1729
+ }
1730
+ } finally {
1731
+ await updateRunOwnership(config, {
1732
+ status: stopRequested ? 'stopped' : 'finished',
1733
+ heartbeatAt: timestamp(),
1734
+ })
1735
+ await releaseRunLock(config.activeRunFile, runId)
1736
+ delete process.env.PI_RUN_ID
1737
+ delete process.env.PI_RUN_LOG_FILE
1609
1738
  }
1610
1739
  }
1611
1740
 
1612
1741
  main().catch(async (error) => {
1613
1742
  const config = loadConfig(process.argv[2] ?? 'once')
1614
- await ensureTelemetryFiles(config)
1615
1743
  await appendLog(config.logFile, `Supervisor error: ${error instanceof Error ? error.stack ?? error.message : String(error)}`)
1616
1744
  console.error(error instanceof Error ? error.message : String(error))
1617
1745
  process.exitCode = 1
@@ -1,6 +1,7 @@
1
1
  import fs from 'node:fs/promises'
2
+ import path from 'node:path'
2
3
 
3
- const CSV_HEADER = 'timestamp,iteration,phase,kind,status,transport,session_id,timed_out,exit_code,duration_seconds,commit_before,commit_after,repo_changed,changed_files_count,verification_status,retry_count,role,model,tool_calls,tool_errors,message_updates,stop_reason,loop_detected,loop_signature,tester_verdict,commit_plan_found,terminal_reason,risk_warnings,notes\n'
4
+ const CSV_HEADER = 'timestamp,run_id,iteration,phase,kind,status,transport,session_id,timed_out,exit_code,duration_seconds,commit_before,commit_after,repo_changed,changed_files_count,verification_status,retry_count,role,model,tool_calls,tool_errors,message_updates,stop_reason,loop_detected,loop_signature,tester_verdict,commit_plan_found,terminal_reason,risk_warnings,notes\n'
4
5
 
5
6
  function csvEscape(value) {
6
7
  const text = String(value ?? '')
@@ -14,22 +15,42 @@ export async function ensureTelemetryFiles(config) {
14
15
  await fs.writeFile(config.lastPromptFile, '', 'utf8')
15
16
  await fs.writeFile(config.lastIterationSummaryFile, '', 'utf8')
16
17
 
18
+ await fs.mkdir(path.dirname(config.logFile), { recursive: true })
19
+ await fs.mkdir(path.dirname(config.telemetryJsonl), { recursive: true })
20
+ await fs.mkdir(path.dirname(config.telemetryCsv), { recursive: true })
17
21
  await fs.appendFile(config.logFile, '', 'utf8')
18
22
  await fs.appendFile(config.telemetryJsonl, '', 'utf8')
23
+ if (config.runTelemetryJsonl && config.runTelemetryJsonl !== config.telemetryJsonl) {
24
+ await fs.mkdir(path.dirname(config.runTelemetryJsonl), { recursive: true })
25
+ await fs.appendFile(config.runTelemetryJsonl, '', 'utf8')
26
+ }
19
27
 
20
28
  try {
21
29
  await fs.access(config.telemetryCsv)
22
30
  } catch {
23
31
  await fs.writeFile(config.telemetryCsv, CSV_HEADER, 'utf8')
24
32
  }
33
+
34
+ if (config.runTelemetryCsv && config.runTelemetryCsv !== config.telemetryCsv) {
35
+ try {
36
+ await fs.access(config.runTelemetryCsv)
37
+ } catch {
38
+ await fs.mkdir(path.dirname(config.runTelemetryCsv), { recursive: true })
39
+ await fs.writeFile(config.runTelemetryCsv, CSV_HEADER, 'utf8')
40
+ }
41
+ }
25
42
  }
26
43
 
27
44
  export async function appendTelemetry(config, event) {
28
45
  const jsonLine = `${JSON.stringify(event)}\n`
29
46
  await fs.appendFile(config.telemetryJsonl, jsonLine, 'utf8')
47
+ if (config.runTelemetryJsonl && config.runTelemetryJsonl !== config.telemetryJsonl) {
48
+ await fs.appendFile(config.runTelemetryJsonl, jsonLine, 'utf8')
49
+ }
30
50
 
31
51
  const csvRow = [
32
52
  event.timestamp,
53
+ event.runId,
33
54
  event.iteration,
34
55
  event.phase,
35
56
  event.kind,
@@ -61,6 +82,9 @@ export async function appendTelemetry(config, event) {
61
82
  ].map(csvEscape).join(',')
62
83
 
63
84
  await fs.appendFile(config.telemetryCsv, `${csvRow}\n`, 'utf8')
85
+ if (config.runTelemetryCsv && config.runTelemetryCsv !== config.telemetryCsv) {
86
+ await fs.appendFile(config.runTelemetryCsv, `${csvRow}\n`, 'utf8')
87
+ }
64
88
  }
65
89
 
66
90
  export async function readTelemetry(config) {