@llblab/pi-actors 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/AGENTS.md +6 -2
  2. package/BACKLOG.md +32 -26
  3. package/CHANGELOG.md +19 -3
  4. package/README.md +23 -8
  5. package/docs/actor-messages.md +5 -3
  6. package/docs/async-runs.md +3 -5
  7. package/docs/command-templates.md +2 -0
  8. package/docs/recipe-library.md +3 -1
  9. package/docs/task-first-recipes.md +29 -0
  10. package/docs/template-recipes.md +9 -14
  11. package/index.ts +111 -32
  12. package/lib/actor-inspector-tui.ts +192 -42
  13. package/lib/actor-rooms.ts +220 -26
  14. package/lib/async-runs.ts +59 -1
  15. package/lib/execution.ts +17 -0
  16. package/lib/file-state.ts +2 -1
  17. package/lib/observability.ts +82 -2
  18. package/lib/prompts.ts +2 -2
  19. package/lib/recipe-discovery.ts +86 -6
  20. package/lib/recipe-migration.ts +0 -2
  21. package/lib/recipe-references.ts +43 -10
  22. package/lib/temp.ts +55 -2
  23. package/lib/tools.ts +99 -11
  24. package/package.json +1 -1
  25. package/recipes/coordinator-locker.json +1 -2
  26. package/recipes/lens-swarm.json +0 -1
  27. package/recipes/locker.json +45 -0
  28. package/recipes/music-player.json +0 -1
  29. package/recipes/pipeline-architect-coordinator.json +0 -1
  30. package/recipes/pipeline-artifact-bundle.json +0 -1
  31. package/recipes/pipeline-artifact-report.json +0 -1
  32. package/recipes/pipeline-artifact-write.json +0 -1
  33. package/recipes/pipeline-async-run-ops.json +0 -1
  34. package/recipes/pipeline-checkpoint-continuation.json +0 -1
  35. package/recipes/pipeline-development-tasking.json +0 -1
  36. package/recipes/pipeline-docs-maintenance.json +0 -1
  37. package/recipes/pipeline-media-library.json +0 -1
  38. package/recipes/pipeline-quorum-review.json +0 -1
  39. package/recipes/pipeline-release-readiness.json +0 -1
  40. package/recipes/pipeline-release-summary.json +0 -1
  41. package/recipes/pipeline-repo-health.json +0 -1
  42. package/recipes/pipeline-research-synthesis.json +0 -1
  43. package/recipes/pipeline-review-readiness.json +0 -1
  44. package/recipes/pipeline-room-swarm.json +3 -2
  45. package/recipes/subagent-artifact.json +0 -1
  46. package/recipes/subagent-checkpoint.json +0 -1
  47. package/recipes/subagent-conflict-report.json +0 -1
  48. package/recipes/subagent-contradiction-map.json +0 -1
  49. package/recipes/subagent-critic.json +0 -1
  50. package/recipes/subagent-evidence-map.json +0 -1
  51. package/recipes/subagent-followup.json +0 -1
  52. package/recipes/subagent-judge.json +0 -1
  53. package/recipes/subagent-merge.json +0 -1
  54. package/recipes/subagent-message.json +0 -1
  55. package/recipes/subagent-normalize.json +0 -1
  56. package/recipes/subagent-plan.json +0 -1
  57. package/recipes/subagent-prompt.json +0 -1
  58. package/recipes/subagent-quorum.json +0 -1
  59. package/recipes/subagent-review-coordinator.json +0 -1
  60. package/recipes/subagent-review.json +0 -1
  61. package/recipes/subagent-task-card.json +0 -1
  62. package/recipes/subagent-tools.json +0 -1
  63. package/recipes/subagent-verify.json +0 -1
  64. package/recipes/subagents-prompts.json +0 -1
  65. package/recipes/utility-actor-message.json +0 -1
  66. package/recipes/utility-artifact-manifest.json +0 -1
  67. package/recipes/utility-artifact-write.json +0 -1
  68. package/recipes/utility-changelog-head.json +0 -1
  69. package/recipes/utility-changelog-section.json +0 -1
  70. package/recipes/utility-coordinator-lock-snapshot.json +0 -1
  71. package/recipes/utility-git-log.json +0 -1
  72. package/recipes/utility-git-status.json +0 -1
  73. package/recipes/utility-jsonl-tail.json +0 -1
  74. package/recipes/utility-markdown-index.json +0 -1
  75. package/recipes/utility-package-summary.json +0 -1
  76. package/recipes/utility-playlist-build.json +0 -1
  77. package/recipes/utility-playlist-scan.json +0 -1
  78. package/recipes/utility-run-ops-snapshot.json +0 -1
  79. package/recipes/utility-run-state-files.json +0 -1
  80. package/recipes/utility-run-summary.json +0 -1
  81. package/recipes/utility-skill-summary.json +0 -1
  82. package/recipes/utility-validate-recipe.json +0 -1
  83. package/recipes/utility-validation-wrapper.json +0 -1
  84. package/scripts/coordinator.mjs +434 -0
  85. package/scripts/{coordinator-locker.mjs → locker.mjs} +23 -22
  86. package/skills/actors/SKILL.md +26 -12
  87. package/skills/swarm/SKILL.md +15 -1
  88. package/scripts/room-swarm.mjs +0 -244
@@ -5,16 +5,21 @@
5
5
  */
6
6
 
7
7
  import * as fs from "node:fs";
8
+ import { randomUUID } from "node:crypto";
8
9
  import * as path from "node:path";
9
10
 
10
11
  import type { ActorMessage } from "./actor-messages.ts";
11
12
 
13
+ const ROOM_LOCK_MAX_AGE_MS = 5 * 60 * 1000;
14
+ const ROOM_LOCK_TIMEOUT_MS = 5000;
15
+ const DEFAULT_ROOM_MAX_MESSAGES = 10000;
16
+ const DEFAULT_SNAPSHOT_MIN_INTERVAL_MS = 250;
17
+
12
18
  export interface RoomMember {
13
19
  address: string;
14
20
  caps?: unknown;
15
21
  claim?: unknown;
16
22
  display?: unknown;
17
- glyph?: unknown;
18
23
  joined_at: string;
19
24
  last_seen: string;
20
25
  parent?: unknown;
@@ -94,6 +99,10 @@ function branchSnapshotFile(stateDir: string, branch: string): string {
94
99
  return path.join(stateDir, "branches", branch, "communication.json");
95
100
  }
96
101
 
102
+ function branchInboxFile(stateDir: string, branch: string): string {
103
+ return path.join(stateDir, "branches", branch, "inbox.jsonl");
104
+ }
105
+
97
106
  function branchIdFromAddress(address: string | undefined, run: string): string | undefined {
98
107
  if (!address) return undefined;
99
108
  const match = new RegExp(`^branch:${run}/(.+)$`).exec(address);
@@ -104,6 +113,43 @@ function ensureRoomDir(stateDir: string, room: string): void {
104
113
  fs.mkdirSync(roomDir(stateDir, room), { recursive: true });
105
114
  }
106
115
 
116
+ function sleepSync(ms: number): void {
117
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
118
+ }
119
+
120
+ function acquireRoomLock(stateDir: string, room: string): () => void {
121
+ ensureRoomDir(stateDir, room);
122
+ const lockDir = path.join(roomDir(stateDir, room), ".append.lock");
123
+ const started = Date.now();
124
+ while (true) {
125
+ try {
126
+ fs.mkdirSync(lockDir);
127
+ fs.writeFileSync(
128
+ path.join(lockDir, "owner.json"),
129
+ `${JSON.stringify({ pid: process.pid, created_at: new Date().toISOString() })}\n`,
130
+ );
131
+ return () => fs.rmSync(lockDir, { recursive: true, force: true });
132
+ } catch (error) {
133
+ try {
134
+ const stat = fs.statSync(lockDir);
135
+ if (Date.now() - stat.mtimeMs > ROOM_LOCK_MAX_AGE_MS) {
136
+ fs.rmSync(lockDir, { recursive: true, force: true });
137
+ continue;
138
+ }
139
+ } catch {
140
+ continue;
141
+ }
142
+ if (Date.now() - started > ROOM_LOCK_TIMEOUT_MS) {
143
+ throw new Error(
144
+ `Room append lock timed out for ${room} in ${stateDir}.`,
145
+ { cause: error },
146
+ );
147
+ }
148
+ sleepSync(10);
149
+ }
150
+ }
151
+ }
152
+
107
153
  function asRecord(value: unknown): Record<string, unknown> {
108
154
  return value && typeof value === "object" && !Array.isArray(value)
109
155
  ? (value as Record<string, unknown>)
@@ -129,6 +175,65 @@ function writeJsonFile(file: string, value: unknown): void {
129
175
  fs.writeFileSync(file, `${JSON.stringify(value, null, 2)}\n`);
130
176
  }
131
177
 
178
+ function positiveEnvInt(name: string, fallback: number): number {
179
+ const value = Number(process.env[name] ?? fallback);
180
+ return Number.isFinite(value) && value > 0 ? Math.floor(value) : fallback;
181
+ }
182
+
183
+ function roomMaxMessages(): number {
184
+ return positiveEnvInt("PI_ACTORS_ROOM_MAX_MESSAGES", DEFAULT_ROOM_MAX_MESSAGES);
185
+ }
186
+
187
+ function snapshotMinIntervalMs(): number {
188
+ return positiveEnvInt(
189
+ "PI_ACTORS_COMMUNICATION_SNAPSHOT_MIN_MS",
190
+ DEFAULT_SNAPSHOT_MIN_INTERVAL_MS,
191
+ );
192
+ }
193
+
194
+ function compactRoomMessages(stateDir: string, room: string): void {
195
+ const maxMessages = roomMaxMessages();
196
+ const file = messagesFile(stateDir, room);
197
+ const lines = readJsonlTailLines(file, maxMessages + 1);
198
+ if (lines.length <= maxMessages) return;
199
+ const kept = lines.slice(-maxMessages);
200
+ fs.writeFileSync(file, `${kept.join("\n")}\n`);
201
+ writeJsonFile(path.join(roomDir(stateDir, room), "compaction.json"), {
202
+ compacted_at: new Date().toISOString(),
203
+ max_messages: maxMessages,
204
+ });
205
+ }
206
+
207
+ function readJsonlTailLines(file: string, limit: number): string[] {
208
+ const lineLimit = Math.max(1, limit);
209
+ const stat = fs.statSync(file);
210
+ if (stat.size === 0) return [];
211
+ const fd = fs.openSync(file, "r");
212
+ try {
213
+ const chunkSize = 64 * 1024;
214
+ const chunks: Buffer[] = [];
215
+ let position = stat.size;
216
+ let newlines = 0;
217
+ while (position > 0 && newlines <= lineLimit) {
218
+ const size = Math.min(chunkSize, position);
219
+ position -= size;
220
+ const chunk = Buffer.allocUnsafe(size);
221
+ fs.readSync(fd, chunk, 0, size, position);
222
+ chunks.unshift(chunk);
223
+ for (let index = size - 1; index >= 0; index -= 1) {
224
+ if (chunk[index] === 10) newlines += 1;
225
+ }
226
+ }
227
+ return Buffer.concat(chunks)
228
+ .toString("utf8")
229
+ .split("\n")
230
+ .filter(Boolean)
231
+ .slice(-lineLimit);
232
+ } finally {
233
+ fs.closeSync(fd);
234
+ }
235
+ }
236
+
132
237
  export function readRoomRoster(
133
238
  stateDir: string,
134
239
  room: string,
@@ -145,6 +250,15 @@ function writeRoomRoster(
145
250
  writeJsonFile(rosterFile(stateDir, room), roster);
146
251
  }
147
252
 
253
+ function shouldDebounceSnapshot(file: string): boolean {
254
+ try {
255
+ return Date.now() - fs.statSync(file).mtimeMs < snapshotMinIntervalMs();
256
+ } catch (error) {
257
+ if ((error as NodeJS.ErrnoException).code === "ENOENT") return false;
258
+ throw error;
259
+ }
260
+ }
261
+
148
262
  function updateRosterForMessage(
149
263
  stateDir: string,
150
264
  room: string,
@@ -153,13 +267,23 @@ function updateRosterForMessage(
153
267
  ): Record<string, RoomMember> {
154
268
  const roster = readRoomRoster(stateDir, room);
155
269
  if (!message.from) return roster;
270
+ const body = asRecord(message.body);
271
+ const current = roster[message.from];
156
272
  if (message.type === "actor.leave") {
157
- delete roster[message.from];
273
+ roster[message.from] = {
274
+ address: message.from,
275
+ joined_at: current?.joined_at ?? receivedAt,
276
+ last_seen: receivedAt,
277
+ ...(current?.caps !== undefined ? { caps: current.caps } : {}),
278
+ ...(current?.claim !== undefined ? { claim: current.claim } : {}),
279
+ ...(current?.display !== undefined ? { display: current.display } : {}),
280
+ ...(current?.parent !== undefined ? { parent: current.parent } : {}),
281
+ ...(current?.role !== undefined ? { role: current.role } : { role: "actor" }),
282
+ status: String(body.status ?? "left"),
283
+ };
158
284
  writeRoomRoster(stateDir, room, roster);
159
285
  return roster;
160
286
  }
161
- const body = asRecord(message.body);
162
- const current = roster[message.from];
163
287
  roster[message.from] = {
164
288
  address: message.from,
165
289
  joined_at: current?.joined_at ?? receivedAt,
@@ -167,7 +291,6 @@ function updateRosterForMessage(
167
291
  ...(body.caps !== undefined ? { caps: body.caps } : current?.caps !== undefined ? { caps: current.caps } : {}),
168
292
  ...(body.claim !== undefined ? { claim: body.claim } : current?.claim !== undefined ? { claim: current.claim } : {}),
169
293
  ...(body.display !== undefined ? { display: body.display } : current?.display !== undefined ? { display: current.display } : {}),
170
- ...(body.glyph !== undefined ? { glyph: body.glyph } : current?.glyph !== undefined ? { glyph: current.glyph } : {}),
171
294
  ...(body.parent !== undefined ? { parent: body.parent } : current?.parent !== undefined ? { parent: current.parent } : {}),
172
295
  ...(body.role !== undefined ? { role: body.role } : current?.role !== undefined ? { role: current.role } : { role: "actor" }),
173
296
  status: String(body.status ?? current?.status ?? "present"),
@@ -176,29 +299,92 @@ function updateRosterForMessage(
176
299
  return roster;
177
300
  }
178
301
 
302
+ export function readBranchInboxMessages(
303
+ stateDir: string,
304
+ run: string,
305
+ address: string,
306
+ limit = 40,
307
+ ): Array<ActorMessage & { id?: string; queued_at?: string; status?: string }> {
308
+ const branch = branchIdFromAddress(address, run);
309
+ if (!branch) throw new Error(`Expected branch:${run}/<branch>; got ${address}`);
310
+ try {
311
+ return readJsonlTailLines(branchInboxFile(stateDir, branch), limit).map(
312
+ (line) => JSON.parse(line),
313
+ );
314
+ } catch (error) {
315
+ if ((error as NodeJS.ErrnoException).code === "ENOENT") return [];
316
+ throw error;
317
+ }
318
+ }
319
+
320
+ export function appendBranchInboxMessage(
321
+ stateDir: string,
322
+ run: string,
323
+ address: string,
324
+ message: ActorMessage,
325
+ ): void {
326
+ const branch = branchIdFromAddress(address, run);
327
+ if (!branch) throw new Error(`Expected branch:${run}/<branch>; got ${address}`);
328
+ fs.mkdirSync(path.dirname(branchInboxFile(stateDir, branch)), { recursive: true });
329
+ fs.writeFileSync(
330
+ branchInboxFile(stateDir, branch),
331
+ `${JSON.stringify({ ...message, id: randomUUID(), queued_at: new Date().toISOString(), status: "queued" })}\n`,
332
+ { flag: "a" },
333
+ );
334
+ }
335
+
336
+ export function updateBranchInboxMessageStatus(
337
+ stateDir: string,
338
+ run: string,
339
+ address: string,
340
+ id: string,
341
+ status: "claimed" | "handled" | "failed",
342
+ metadata: Record<string, unknown> = {},
343
+ ): boolean {
344
+ const branch = branchIdFromAddress(address, run);
345
+ if (!branch) throw new Error(`Expected branch:${run}/<branch>; got ${address}`);
346
+ const file = branchInboxFile(stateDir, branch);
347
+ const messages = readBranchInboxMessages(stateDir, run, address, Number.MAX_SAFE_INTEGER);
348
+ let changed = false;
349
+ const timestampKey = `${status}_at`;
350
+ const updated = messages.map((message) => {
351
+ if (message.id !== id) return message;
352
+ changed = true;
353
+ return { ...message, ...metadata, [timestampKey]: new Date().toISOString(), status };
354
+ });
355
+ if (!changed) return false;
356
+ fs.writeFileSync(file, `${updated.map((message) => JSON.stringify(message)).join("\n")}\n`);
357
+ return true;
358
+ }
359
+
179
360
  export function appendRoomMessage(
180
361
  stateDir: string,
181
362
  room: string,
182
363
  message: ActorMessage,
183
364
  ): RoomAppendResult {
184
- ensureRoomDir(stateDir, room);
185
- const receivedAt = new Date().toISOString();
186
- const entry: RoomTimelineEntry = { ...message, received_at: receivedAt };
187
- fs.appendFileSync(messagesFile(stateDir, room), `${JSON.stringify(entry)}\n`);
188
- const roster = updateRosterForMessage(stateDir, room, message, receivedAt);
189
- const run = runFromRoomAddress(message.to);
190
- if (run) {
191
- writeCommunicationSnapshot(stateDir, run);
192
- if (message.from && branchIdFromAddress(message.from, run)) {
193
- writeBranchCommunicationSnapshot(stateDir, run, message.from);
365
+ const releaseLock = acquireRoomLock(stateDir, room);
366
+ try {
367
+ const receivedAt = new Date().toISOString();
368
+ const entry: RoomTimelineEntry = { ...message, received_at: receivedAt };
369
+ fs.appendFileSync(messagesFile(stateDir, room), `${JSON.stringify(entry)}\n`);
370
+ compactRoomMessages(stateDir, room);
371
+ const roster = updateRosterForMessage(stateDir, room, message, receivedAt);
372
+ const run = runFromRoomAddress(message.to);
373
+ if (run) {
374
+ writeCommunicationSnapshot(stateDir, run);
375
+ if (message.from && branchIdFromAddress(message.from, run)) {
376
+ writeBranchCommunicationSnapshotDebounced(stateDir, run, message.from);
377
+ }
194
378
  }
379
+ return {
380
+ message_count: readRoomMessages(stateDir, room).length,
381
+ room,
382
+ roster_count: Object.keys(roster).length,
383
+ sent: true,
384
+ };
385
+ } finally {
386
+ releaseLock();
195
387
  }
196
- return {
197
- message_count: readRoomMessages(stateDir, room).length,
198
- room,
199
- roster_count: Object.keys(roster).length,
200
- sent: true,
201
- };
202
388
  }
203
389
 
204
390
  export function readRoomMessages(
@@ -207,11 +393,8 @@ export function readRoomMessages(
207
393
  limit = 40,
208
394
  ): RoomTimelineEntry[] {
209
395
  try {
210
- const lines = fs
211
- .readFileSync(messagesFile(stateDir, room), "utf8")
212
- .split("\n")
213
- .filter(Boolean);
214
- return lines.slice(-Math.max(1, limit)).map((line) => JSON.parse(line));
396
+ const lines = readJsonlTailLines(messagesFile(stateDir, room), limit);
397
+ return lines.map((line) => JSON.parse(line));
215
398
  } catch (error) {
216
399
  if ((error as NodeJS.ErrnoException).code === "ENOENT") return [];
217
400
  throw error;
@@ -371,3 +554,14 @@ export function writeBranchCommunicationSnapshot(
371
554
  writeJsonFile(branchSnapshotFile(stateDir, branch), snapshot);
372
555
  return snapshot;
373
556
  }
557
+
558
+ function writeBranchCommunicationSnapshotDebounced(
559
+ stateDir: string,
560
+ run: string,
561
+ self: string,
562
+ ): ActorCommunicationSnapshot | undefined {
563
+ const branch = branchIdFromAddress(self, run);
564
+ if (!branch) throw new Error(`Expected branch:${run}/<branch>; got ${self}`);
565
+ if (shouldDebounceSnapshot(branchSnapshotFile(stateDir, branch))) return undefined;
566
+ return writeBranchCommunicationSnapshot(stateDir, run, self);
567
+ }
package/lib/async-runs.ts CHANGED
@@ -32,6 +32,8 @@ import * as Paths from "./paths.ts";
32
32
  import * as RecipeReferences from "./recipe-references.ts";
33
33
  import * as RecipeUsage from "./recipe-usage.ts";
34
34
 
35
+ const START_LOCK_MAX_AGE_MS = 5 * 60 * 1000;
36
+
35
37
  export interface AsyncRunStartParams {
36
38
  async?: boolean;
37
39
  file?: string;
@@ -51,6 +53,7 @@ export interface AsyncRunStartParams {
51
53
  output?: string;
52
54
  artifacts?: Record<string, string>;
53
55
  mailbox?: RecipeReferences.TemplateRecipeMailbox;
56
+ retire_when?: "children_terminal";
54
57
  retry?: number | string;
55
58
  failure?: CommandTemplateFailureScope;
56
59
  recover?: CommandTemplateValue;
@@ -104,6 +107,7 @@ export interface AsyncRunMeta {
104
107
  values: Record<string, unknown>;
105
108
  artifacts?: Record<string, string>;
106
109
  mailbox?: RecipeReferences.TemplateRecipeMailbox;
110
+ retire_when?: "children_terminal";
107
111
  }
108
112
 
109
113
  const DEFAULT_STATE_ROOT = Paths.getRunStateRoot();
@@ -169,6 +173,17 @@ function resolveStateDir(params: AsyncRunStartParams, run: string): string {
169
173
  return resolve(params.state_dir || join(DEFAULT_STATE_ROOT, run));
170
174
  }
171
175
 
176
+ function assertNoActiveRunState(stateDir: string): void {
177
+ const meta = readJson(join(stateDir, "run.json"));
178
+ if (!meta) return;
179
+ const pid = Number(meta.pid || 0);
180
+ const cwd = String(meta.cwd ?? "");
181
+ if (!pid || !isAlive(pid) || !pidMatchesRun(pid, cwd, stateDir)) return;
182
+ throw new Error(
183
+ `Run state already has an active owned process: ${String(meta.run ?? stateDir)}. Stop it before reusing the same run_id or state_dir.`,
184
+ );
185
+ }
186
+
172
187
  function resolveRecipeFile(file: string): string {
173
188
  return RecipeReferences.resolveRecipePath(file, DEFAULT_RECIPE_ROOT);
174
189
  }
@@ -277,6 +292,39 @@ function getInterruptedRunStatus(
277
292
  return undefined;
278
293
  }
279
294
 
295
+ function acquireStateStartLock(stateDir: string): () => void {
296
+ const lockDir = join(stateDir, ".start.lock");
297
+ try {
298
+ mkdirSync(lockDir);
299
+ writeFileSync(
300
+ join(lockDir, "owner.json"),
301
+ `${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() })}\n`,
302
+ "utf8",
303
+ );
304
+ } catch (error) {
305
+ try {
306
+ const stat = statSync(lockDir);
307
+ if (Date.now() - stat.mtimeMs > START_LOCK_MAX_AGE_MS) {
308
+ rmSync(lockDir, { recursive: true, force: true });
309
+ mkdirSync(lockDir);
310
+ writeFileSync(
311
+ join(lockDir, "owner.json"),
312
+ `${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), recovered: true })}\n`,
313
+ "utf8",
314
+ );
315
+ return () => rmSync(lockDir, { recursive: true, force: true });
316
+ }
317
+ } catch {
318
+ // Keep the original lock acquisition error below.
319
+ }
320
+ throw new Error(
321
+ `Run state is already being started: ${stateDir}. Retry after the current start finishes.`,
322
+ { cause: error },
323
+ );
324
+ }
325
+ return () => rmSync(lockDir, { recursive: true, force: true });
326
+ }
327
+
280
328
  function prepareStateDirForStart(stateDir: string): void {
281
329
  const existing = readJson(join(stateDir, "run.json"));
282
330
  const existingPid = Number(existing?.pid || 0);
@@ -316,8 +364,12 @@ export function startRun(
316
364
  const resolved = resolveRunTemplate(startParams);
317
365
  const run = safeRunId(startParams.run_id);
318
366
  const stateDir = resolveStateDir(startParams, run);
367
+ assertNoActiveRunState(stateDir);
319
368
  mkdirSync(stateDir, { recursive: true });
320
- prepareStateDirForStart(stateDir);
369
+ const releaseStartLock = acquireStateStartLock(stateDir);
370
+ try {
371
+ assertNoActiveRunState(stateDir);
372
+ prepareStateDirForStart(stateDir);
321
373
  const stdout = join(stateDir, "stdout.log");
322
374
  const stderr = join(stateDir, "stderr.log");
323
375
  const recipeFile = startParams.file
@@ -359,6 +411,9 @@ export function startRun(
359
411
  values,
360
412
  ...(artifacts ? { artifacts } : {}),
361
413
  ...(startParams.mailbox ? { mailbox: startParams.mailbox } : {}),
414
+ ...(startParams.retire_when === "children_terminal"
415
+ ? { retire_when: "children_terminal" as const }
416
+ : {}),
362
417
  };
363
418
  writeJsonAtomic(join(stateDir, "run.json"), meta);
364
419
  const child = spawn(process.execPath, argv, {
@@ -383,6 +438,9 @@ export function startRun(
383
438
  );
384
439
  child.unref();
385
440
  return meta;
441
+ } finally {
442
+ releaseStartLock();
443
+ }
386
444
  }
387
445
 
388
446
  function normalizeRunOutboxDelivery(value: unknown): RunOutboxDelivery {
package/lib/execution.ts CHANGED
@@ -70,6 +70,8 @@ export type RegisteredToolExec = (
70
70
  options?: ToolExecOptions,
71
71
  ) => Promise<ToolExecResult>;
72
72
 
73
+ const DEFAULT_MAX_PARALLEL_BRANCHES = 64;
74
+
73
75
  type TemplateExecution = {
74
76
  branches: BranchReport[];
75
77
  commands: string[];
@@ -170,6 +172,19 @@ function createSoftQuorum(
170
172
  };
171
173
  }
172
174
 
175
+ function getMaxParallelBranches(): number {
176
+ const raw = Number(process.env.PI_ACTORS_MAX_PARALLEL_BRANCHES ?? "");
177
+ return Number.isInteger(raw) && raw > 0 ? raw : DEFAULT_MAX_PARALLEL_BRANCHES;
178
+ }
179
+
180
+ function assertParallelBranchLimit(count: number): void {
181
+ const max = getMaxParallelBranches();
182
+ if (count <= max) return;
183
+ throw new Error(
184
+ `Command template parallel fanout ${count} exceeds limit ${max}; set PI_ACTORS_MAX_PARALLEL_BRANCHES to override intentionally.`,
185
+ );
186
+ }
187
+
173
188
  function normalizeFailureScope(
174
189
  value: CommandTemplates.CommandTemplateFailureScope | undefined,
175
190
  ): CommandTemplates.CommandTemplateFailureScope {
@@ -428,6 +443,7 @@ async function executeTemplateConfig(
428
443
  );
429
444
  if (repeat === undefined)
430
445
  throw new Error("Command template repeat could not be resolved.");
446
+ if (normalized.parallel === true) assertParallelBranchLimit(repeat);
431
447
  const repeatedSteps = Array.from({ length: repeat }, (_unused, index0) => {
432
448
  const { repeat: _repeat, ...rest } = normalized;
433
449
  return {
@@ -521,6 +537,7 @@ async function executeTemplateConfig(
521
537
  if (steps.length === 0)
522
538
  throw new Error(formatToolText("Tool template produced no command steps."));
523
539
  if (normalized.parallel === true) {
540
+ assertParallelBranchLimit(steps.length);
524
541
  const branchResults = await Promise.all(
525
542
  steps.map((step) =>
526
543
  executeTemplateConfig(
package/lib/file-state.ts CHANGED
@@ -4,12 +4,13 @@
4
4
  * Owns generic durable JSON file writes shared by registry config and async run state.
5
5
  */
6
6
 
7
+ import { randomUUID } from "node:crypto";
7
8
  import { mkdirSync, renameSync, unlinkSync, writeFileSync } from "node:fs";
8
9
  import { dirname } from "node:path";
9
10
 
10
11
  export function writeJsonAtomic(path: string, value: unknown): void {
11
12
  mkdirSync(dirname(path), { recursive: true });
12
- const tempPath = `${path}.${process.pid}.${Date.now()}.tmp`;
13
+ const tempPath = `${path}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`;
13
14
  try {
14
15
  writeFileSync(tempPath, `${JSON.stringify(value, null, 2)}\n`, "utf8");
15
16
  renameSync(tempPath, path);
@@ -27,6 +27,7 @@ export interface RunObservation {
27
27
  ownerId?: string;
28
28
  artifacts?: Record<string, string>;
29
29
  terminalHandled?: boolean;
30
+ retireWhen?: string;
30
31
  run: string;
31
32
  stateDir?: string;
32
33
  status: RunObservedStatus;
@@ -45,6 +46,12 @@ export interface RunSummary {
45
46
  total: number;
46
47
  }
47
48
 
49
+ export interface RunRetirementCandidate {
50
+ activeSubagents: number;
51
+ run: string;
52
+ stateDir: string;
53
+ }
54
+
48
55
  export interface RunTransition {
49
56
  from: RunObservedStatus;
50
57
  run: string;
@@ -77,6 +84,12 @@ const TERMINAL = new Set<RunObservedStatus>([
77
84
  "cancelled",
78
85
  "killed",
79
86
  ]);
87
+ const PROC_DESCENDANT_SCAN_TTL_MS = 1000;
88
+
89
+ const procDescendantScanCache = new Map<
90
+ string,
91
+ { count: number; expiresAt: number; signature: string }
92
+ >();
80
93
 
81
94
  function toNumber(value: unknown): number | undefined {
82
95
  const number = Number(value);
@@ -120,6 +133,9 @@ function observeRun(stateDir: string): RunObservation | undefined {
120
133
  ? { artifacts: status.artifacts as Record<string, string> }
121
134
  : {}),
122
135
  ...(status.terminal_handled ? { terminalHandled: true } : {}),
136
+ ...(typeof status.retire_when === "string"
137
+ ? { retireWhen: status.retire_when }
138
+ : {}),
123
139
  run,
124
140
  stateDir,
125
141
  status: status.status as RunObservedStatus,
@@ -160,10 +176,12 @@ export function summarizeRuns(
160
176
  const failed = runs.filter((run) => run.status === "failed").length;
161
177
  const cancelled = runs.filter((run) => run.status === "cancelled").length;
162
178
  const killed = runs.filter((run) => run.status === "killed").length;
163
- const runningSubagents = runningRuns.reduce(
179
+ const progressSubagents = runningRuns.reduce(
164
180
  (sum, run) => sum + Math.max(1, Math.floor(run.activeSubagents ?? 0)),
165
181
  0,
166
182
  );
183
+ const processSubagents = countRunningSubagents(stateRoot, ownerId);
184
+ const runningSubagents = Math.max(progressSubagents, running + processSubagents);
167
185
  return {
168
186
  cancelled,
169
187
  done,
@@ -253,9 +271,22 @@ export function countRunningSubagents(
253
271
  ): number {
254
272
  const runPids = getRunningRunPids(stateRoot, ownerId);
255
273
  if (runPids.size === 0 || !existsSync("/proc")) return 0;
274
+ const signature = [...runPids].sort().join(",");
275
+ const cacheKey = `${stateRoot}\0${ownerId ?? ""}`;
276
+ const cached = procDescendantScanCache.get(cacheKey);
277
+ const now = Date.now();
278
+ if (cached && cached.signature === signature && cached.expiresAt > now) {
279
+ return cached.count;
280
+ }
256
281
  const parentByPid = new Map<string, string>();
257
282
  const commandByPid = new Map<string, string>();
258
- for (const entry of readdirSync("/proc", { withFileTypes: true })) {
283
+ let procEntries: import("node:fs").Dirent[];
284
+ try {
285
+ procEntries = readdirSync("/proc", { withFileTypes: true });
286
+ } catch {
287
+ return 0;
288
+ }
289
+ for (const entry of procEntries) {
259
290
  if (!entry.isDirectory() || !/^\d+$/.test(entry.name)) continue;
260
291
  const ppid = getProcPpid(entry.name);
261
292
  if (!ppid) continue;
@@ -277,6 +308,11 @@ export function countRunningSubagents(
277
308
  if (!command.includes("pi -p") && !command.includes("pi\0-p")) continue;
278
309
  if (descendantOfRun(pid)) count++;
279
310
  }
311
+ procDescendantScanCache.set(cacheKey, {
312
+ count,
313
+ expiresAt: now + PROC_DESCENDANT_SCAN_TTL_MS,
314
+ signature,
315
+ });
280
316
  return count;
281
317
  }
282
318
 
@@ -299,6 +335,23 @@ export function renderRunStatus(
299
335
  return renderSubagentStatus(summary.runningSubagents, frame);
300
336
  }
301
337
 
338
+ export function findRunRetirementCandidates(
339
+ summary: RunSummary,
340
+ ): RunRetirementCandidate[] {
341
+ return summary.runs
342
+ .filter((run) =>
343
+ run.status === "running" &&
344
+ run.retireWhen === "children_terminal" &&
345
+ run.stateDir &&
346
+ Math.floor(run.activeSubagents ?? 0) <= 0,
347
+ )
348
+ .map((run) => ({
349
+ activeSubagents: Math.max(0, Math.floor(run.activeSubagents ?? 0)),
350
+ run: run.run,
351
+ stateDir: run.stateDir!,
352
+ }));
353
+ }
354
+
302
355
  export function detectRunTransitions(
303
356
  previous: Map<string, RunObservedStatus>,
304
357
  summary: RunSummary,
@@ -384,6 +437,33 @@ function readOutboxLines(run: RunObservation): string[] {
384
437
  return content ? content.split("\n") : [];
385
438
  }
386
439
 
440
+ export function pruneRunObservationState(
441
+ previousStatuses: Map<string, RunObservedStatus>,
442
+ previousLineCounts: Map<string, number>,
443
+ summary: RunSummary,
444
+ terminalRuns: Iterable<string> = [],
445
+ ): void {
446
+ const activeRuns = new Set(summary.runs.map((run) => run.run));
447
+ const terminalRunSet = new Set(terminalRuns);
448
+ const terminalLineKeys = new Set(
449
+ summary.runs
450
+ .filter((run) => terminalRunSet.has(run.run))
451
+ .map((run) => run.stateDir ?? run.run),
452
+ );
453
+ const activeLineKeys = new Set(
454
+ summary.runs.map((run) => run.stateDir ?? run.run),
455
+ );
456
+ for (const run of terminalRunSet) previousStatuses.delete(run);
457
+ for (const run of previousStatuses.keys()) {
458
+ if (!activeRuns.has(run)) previousStatuses.delete(run);
459
+ }
460
+ for (const key of previousLineCounts.keys()) {
461
+ if (terminalLineKeys.has(key) || !activeLineKeys.has(key)) {
462
+ previousLineCounts.delete(key);
463
+ }
464
+ }
465
+ }
466
+
387
467
  export function detectRunOutboxEvents(
388
468
  previousLineCounts: Map<string, number>,
389
469
  summary: RunSummary,
package/lib/prompts.ts CHANGED
@@ -25,13 +25,13 @@ export const ONBOARDING_SYSTEM_PROMPT = `pi-actors quick model:
25
25
  - Command templates stay sync: string leaf, array sequence, object node; flags include args/defaults, parallel, when, timeout, delay, retry, failure, recover, repeat, output.
26
26
  - Placeholders support typed/default args plus {value??fallback} and {flag?yes:no}.
27
27
  - ~/.pi/agent/recipes/*.json is actor muscle memory: every recipe there is auto-registered as an agent tool across sessions; register_tool writes there.
28
- - Recipes own template directly and may declare metadata/defaults/imports/mailbox/artifacts.
28
+ - Recipes own template directly and may declare metadata/defaults/imports/mailbox/artifacts; files >1 MiB or import depth >32 fail closed.
29
29
  - Recipe imports are local variables; imported recipes are definitions, not nested async runs; parent async:true creates one run.
30
30
  - Use spawn/message/inspect for actor-level start/send/observe; avoid runtime/FIFO/outbox vocabulary in public guidance.
31
31
  - Run state lives under ~/.pi/agent/tmp/pi-actors/runs; inspect status/tail/messages/mailbox/files/artifacts intentionally and avoid busy-polling.
32
32
  - Maintain ~/.pi/agent/recipes like MEMORY.md for capabilities: keep useful tools, curate stale ones; packaged recipes are lower-priority components, not tools by location.
33
33
  - Foreground tools/templates fit short work; async recipes/runs fit subagents, services, fanout, media, and long pipelines.
34
- - Long fanout = parent async recipe wrapping template(parallel:true) and imports; packaged fanout recipes bubble branch completion messages.
34
+ - Long fanout = parent async recipe wrapping template(parallel:true) and imports; packaged fanout recipes bubble branch completion messages; grow recurring multi-agent workflows as packaged recipes/pipelines, not ad hoc external scripts.
35
35
  - For deeper pi-actors guidance, inspect installed extension sources/docs/recipes; README and docs are not automatically in context.`;
36
36
 
37
37
  export const REGISTER_TOOL_PARAM_DESCRIPTIONS = {