@agfpd/iapeer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/bin/iapeer +25 -0
  2. package/package.json +37 -0
  3. package/src/cli/cli.test.ts +130 -0
  4. package/src/cli/index.ts +608 -0
  5. package/src/cli/listTui.test.ts +70 -0
  6. package/src/cli/listTui.ts +165 -0
  7. package/src/codec/codec.test.ts +271 -0
  8. package/src/codec/index.ts +217 -0
  9. package/src/core/constants.test.ts +21 -0
  10. package/src/core/constants.ts +180 -0
  11. package/src/core/errors.ts +20 -0
  12. package/src/core/index.ts +3 -0
  13. package/src/core/normalize.test.ts +98 -0
  14. package/src/core/normalize.ts +89 -0
  15. package/src/core/socket.ts +63 -0
  16. package/src/create/create.test.ts +143 -0
  17. package/src/create/index.ts +178 -0
  18. package/src/daemon/daemon-http.test.ts +114 -0
  19. package/src/daemon/daemon.test.ts +103 -0
  20. package/src/daemon/index.ts +439 -0
  21. package/src/daemon/main.test.ts +194 -0
  22. package/src/daemon/main.ts +230 -0
  23. package/src/enable/enable.test.ts +92 -0
  24. package/src/enable/index.ts +381 -0
  25. package/src/identity/identity.test.ts +262 -0
  26. package/src/identity/index.ts +603 -0
  27. package/src/index.ts +27 -0
  28. package/src/init/index.ts +408 -0
  29. package/src/init/init.test.ts +171 -0
  30. package/src/init/runtime-resolve.test.ts +49 -0
  31. package/src/install/index.ts +84 -0
  32. package/src/install/install.test.ts +31 -0
  33. package/src/launch/adapters/claude.ts +250 -0
  34. package/src/launch/adapters/codex.ts +329 -0
  35. package/src/launch/adapters/notifier.ts +90 -0
  36. package/src/launch/adapters/telegram.ts +130 -0
  37. package/src/launch/bootstrap.test.ts +56 -0
  38. package/src/launch/composeSystemPrompt.layers.test.ts +319 -0
  39. package/src/launch/composeSystemPrompt.test.ts +98 -0
  40. package/src/launch/composeSystemPrompt.ts +261 -0
  41. package/src/launch/index.ts +253 -0
  42. package/src/launch/launch.test.ts +233 -0
  43. package/src/launch/launchd.test.ts +363 -0
  44. package/src/launch/launchd.ts +375 -0
  45. package/src/launch/launchdRun.ts +168 -0
  46. package/src/launch/sockdir.test.ts +70 -0
  47. package/src/launch/types.ts +300 -0
  48. package/src/lifecycle/index.ts +840 -0
  49. package/src/lifecycle/lifecycle.test.ts +496 -0
  50. package/src/onboard/index.ts +135 -0
  51. package/src/onboard/onboard.test.ts +39 -0
  52. package/src/provision/index.ts +170 -0
  53. package/src/provision/provision.test.ts +104 -0
  54. package/src/registry/index.ts +453 -0
  55. package/src/registry/registry.test.ts +400 -0
  56. package/src/runtime/deploy.ts +230 -0
  57. package/src/runtime/index.ts +191 -0
  58. package/src/runtime/runtime.test.ts +226 -0
  59. package/src/storage/index.ts +331 -0
  60. package/src/storage/peers-home.test.ts +34 -0
  61. package/src/storage/storage.test.ts +65 -0
  62. package/src/transport/index.ts +522 -0
  63. package/tsconfig.json +17 -0
@@ -0,0 +1,840 @@
1
+ // Lifecycle — wake-on-miss / supervise / reap. The warm-on-demand core: a dead
2
+ // peer is woken (spawned) on demand, its first message delivered, and idle
3
+ // sessions are reaped. Consolidated from Spawned-Peer spawner.ts (performSpawn)
4
+ // + watcher.ts (boot / ready-gate / idle phases), but with the detached
5
+ // per-session watcher COLLAPSED into the daemon: wakeOrSpawn runs boot + ready
6
+ // inline (the daemon awaits it) and a single superviseTick drives idle-reap.
7
+ //
8
+ // HARD SAFETY (H4): the daemon NEVER wakes / reaps / respawns / sweeps a peer
9
+ // that has a launchd plist (~/Library/LaunchAgents/com.iapeer.<p>.plist). Such a
10
+ // peer is launchd-managed (KeepAlive owns its lifecycle); the daemon touching it
11
+ // would race launchd on the live fleet. isLaunchdManaged() is checked FIRST,
12
+ // before any wake or reap. wakeOrSpawn refuses a launchd peer; superviseTick and
13
+ // sweepZombies skip it. Only daemon-owned (no-plist) peers are managed here.
14
+
15
+ import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from 'fs'
16
+ import { homedir } from 'os'
17
+ import { join } from 'path'
18
+ import { spawnSync } from 'child_process'
19
+ import * as lockfile from 'proper-lockfile'
20
+ import {
21
+ STATE_DIR,
22
+ LOGS_DIR,
23
+ isRuntime,
24
+ resolveSockDir,
25
+ type Runtime,
26
+ } from '../core/constants.ts'
27
+ import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
28
+ import { err, ok, type Result } from '../core/errors.ts'
29
+ import { resolveGlobalRoot } from '../storage/index.ts'
30
+ import { readPeerProfile, resolveIdentity } from '../identity/index.ts'
31
+ import { findPeer, publicPeerSummary, readPeersIndex, type PeerRecord, type PublicPeerSummary } from '../registry/index.ts'
32
+ // Ф3: launch = HOW to bring up ONE session (runtime-agnostic primitive + adapter).
33
+ // lifecycle decides WHEN/HOW-MANY and delegates the bring-up to launch.
34
+ import {
35
+ getAdapter,
36
+ launch,
37
+ launchAgentsDir,
38
+ launchdLabel,
39
+ type LaunchConfig,
40
+ type LaunchSpec,
41
+ } from '../launch/index.ts'
42
+ import { composeSystemPrompt, gatherPromptInput } from '../launch/composeSystemPrompt.ts'
43
+
44
+ // ─────────────────────────────────────────────────────────────────────────────
45
+ // Config
46
+ // ─────────────────────────────────────────────────────────────────────────────
47
+
48
+ export interface LifecycleConfig {
49
+ claudeBin: string
50
+ codexBin: string
51
+ sockDir: string
52
+ stateDir: string // ~/.iapeer/state/lifecycle
53
+ logDir: string // ~/.iapeer/logs/lifecycle
54
+ bootDeadlineSecs: number
55
+ readyGateSecs: number
56
+ idleSecs: number
57
+ maxAgeSecs: number
58
+ }
59
+
60
+ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
61
+ const home = env.HOME?.trim() || homedir()
62
+ const root = resolveGlobalRoot(env)
63
+ const num = (raw: string | undefined, dflt: number): number => {
64
+ const n = parseInt(raw ?? '', 10)
65
+ return Number.isFinite(n) && n > 0 ? n : dflt
66
+ }
67
+ return {
68
+ claudeBin: env.IAPEER_CLAUDE_BIN ?? join(home, '.local', 'bin', 'claude'),
69
+ codexBin: env.IAPEER_CODEX_BIN ?? 'codex',
70
+ sockDir: resolveSockDir(env),
71
+ stateDir: join(root, STATE_DIR, 'lifecycle'),
72
+ logDir: join(root, LOGS_DIR, 'lifecycle'),
73
+ bootDeadlineSecs: num(env.IAPEER_BOOT_DEADLINE_SECS, 240),
74
+ readyGateSecs: num(env.IAPEER_READY_GATE_SECS, 120),
75
+ idleSecs: num(env.IAPEER_IDLE_SECS, 3600),
76
+ maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
77
+ }
78
+ }
79
+
80
+ // ─────────────────────────────────────────────────────────────────────────────
81
+ // H4 — launchd-managed detector (checked FIRST, before any wake/reap)
82
+ // ─────────────────────────────────────────────────────────────────────────────
83
+
84
+ /**
85
+ * True iff the peer is launchd-managed: `~/Library/LaunchAgents/
86
+ * com.iapeer.<personality>.plist` exists. A launchd peer is in the launchd
87
+ * domain — KeepAlive owns its lifecycle — and the daemon must be READ-ONLY for
88
+ * it (deliver to it if live, but never wake / reap / respawn / sweep it). This
89
+ * is the hard guard against fighting launchd on the live fleet.
90
+ */
91
+ export function isLaunchdManaged(personality: string, env: NodeJS.ProcessEnv = process.env): boolean {
92
+ // Label + LaunchAgents dir come from the SAME helpers the plist generator uses
93
+ // (launch/launchd.ts), so this H4 detector and installAlwaysOnPlist can never
94
+ // disagree on `com.iapeer.<personality>.plist`. IAPEER_LAUNCHAGENTS_DIR overrides
95
+ // the dir for tests (so an H4-guard test never touches ~/Library/LaunchAgents).
96
+ return existsSync(join(launchAgentsDir(env), `${launchdLabel(personality)}.plist`))
97
+ }
98
+
99
+ // ─────────────────────────────────────────────────────────────────────────────
100
+ // Session state — what the supervisor walks (daemon-owned sessions only)
101
+ // ─────────────────────────────────────────────────────────────────────────────
102
+
103
+ export interface SessionState {
104
+ identity: string
105
+ runtime: Runtime
106
+ personality: string
107
+ cwd: string
108
+ wokeAt: number
109
+ }
110
+
111
+ function sessionStatePath(cfg: LifecycleConfig, identity: string): string {
112
+ return join(cfg.stateDir, `${identity}.session`)
113
+ }
114
+
115
+ function writeSessionState(cfg: LifecycleConfig, state: SessionState): void {
116
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
117
+ try {
118
+ writeFileSync(sessionStatePath(cfg, state.identity), JSON.stringify(state), { mode: 0o600 })
119
+ } catch {
120
+ /* best-effort — supervision degrades to liveness scan, never blocks a wake */
121
+ }
122
+ }
123
+
124
+ function removeSessionState(cfg: LifecycleConfig, identity: string): void {
125
+ try {
126
+ rmSync(sessionStatePath(cfg, identity), { force: true })
127
+ } catch {
128
+ /* already gone */
129
+ }
130
+ }
131
+
132
+ // ─────────────────────────────────────────────────────────────────────────────
133
+ // C1 — durable stopped flag (warm-on-demand stop/start; contract ЖЦ §stop/start,
134
+ // Демон §stopped). `stop <peer>` on a warm runtime kills the session AND drops this
135
+ // flag → the daemon REFUSES to wake the peer (a DELIBERATE operator halt, not a
136
+ // fault — no message queue, the sender gets an explicit "stopped" error). `start`
137
+ // clears it (wakeable again). Distinct from idle-reap (temporary; the daemon DOES
138
+ // wake on the next message). Lives next to the session-state, in state/lifecycle —
139
+ // daemon-owned, durable across restarts. Keyed on IDENTITY (runtime-personality):
140
+ // `stop <peer> <runtime>` halts one runtime; the flag is per-runtime presence.
141
+ // always-on (launchd) peers are NOT stopped this way — their stop is launchctl
142
+ // bootout (ЖЦ); a launchd peer never carries this flag (and the daemon is H4
143
+ // read-only for it regardless).
144
+ // ─────────────────────────────────────────────────────────────────────────────
145
+
146
+ function stoppedFlagPath(cfg: LifecycleConfig, identity: string): string {
147
+ return join(cfg.stateDir, `${identity}.stopped`)
148
+ }
149
+
150
+ /** True iff the peer identity carries a durable stop flag (daemon must not wake it). */
151
+ export function isStopped(cfg: LifecycleConfig, identity: string): boolean {
152
+ return existsSync(stoppedFlagPath(cfg, identity))
153
+ }
154
+
155
+ /** Drop the durable stop flag (the `stop` verb does this after killing the session). */
156
+ export function setStopped(cfg: LifecycleConfig, identity: string): void {
157
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
158
+ writeFileSync(stoppedFlagPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
159
+ }
160
+
161
+ /** Clear the durable stop flag (the `start` verb does this — peer wakeable again). */
162
+ export function clearStopped(cfg: LifecycleConfig, identity: string): void {
163
+ try {
164
+ rmSync(stoppedFlagPath(cfg, identity), { force: true })
165
+ } catch {
166
+ /* already gone */
167
+ }
168
+ }
169
+
170
+ // ─────────────────────────────────────────────────────────────────────────────
171
+ // C4 — /new graceful mark (contract ЖЦ §/new). The AGENT, on an owner /new,
172
+ // writes a handoff to durable memory, drops THIS mark, and self-kills. The daemon
173
+ // detects the mark and re-launches EAGERLY as FRESH + initial_prompt (contrast:
174
+ // idle-reap is markless → lazy resume on the next message). The mark is consumed on
175
+ // that fresh re-launch. The mark TEXT/agent-side (doctrine /new instruction) is a
176
+ // separate deploy artifact; THIS is only the daemon side (detect → fresh + seed).
177
+ // ─────────────────────────────────────────────────────────────────────────────
178
+
179
+ function newMarkPath(cfg: LifecycleConfig, identity: string): string {
180
+ return join(cfg.stateDir, `${identity}.new`)
181
+ }
182
+
183
+ /** True iff the identity carries a /new graceful mark (→ eager fresh + seed). */
184
+ export function hasNewMark(cfg: LifecycleConfig, identity: string): boolean {
185
+ return existsSync(newMarkPath(cfg, identity))
186
+ }
187
+
188
+ /** Drop the /new mark (the agent's self-kill ritual does this before exiting). */
189
+ export function setNewMark(cfg: LifecycleConfig, identity: string): void {
190
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
191
+ writeFileSync(newMarkPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
192
+ }
193
+
194
+ /** Consume the /new mark (the daemon does this on the eager fresh re-launch). */
195
+ export function clearNewMark(cfg: LifecycleConfig, identity: string): void {
196
+ try {
197
+ rmSync(newMarkPath(cfg, identity), { force: true })
198
+ } catch {
199
+ /* already gone */
200
+ }
201
+ }
202
+
203
+ // ─────────────────────────────────────────────────────────────────────────────
204
+ // resolveWakeMode (C3a + C4a) — the resume-vs-fresh decision, contract ЖЦ
205
+ // §resume/fresh. Pure but for the /new-mark consume (a wake side-effect); takes the
206
+ // adapter's resolveResume as a parameter so it is unit-testable without a runtime.
207
+ // ─────────────────────────────────────────────────────────────────────────────
208
+
209
+ export interface WakeMode {
210
+ resume: boolean
211
+ resumeRef?: string
212
+ /** Set ONLY for an EXPLICIT resume request that found nothing to resume — the
213
+ * caller must fail loud (never a silent fresh fallback). */
214
+ failReason?: string
215
+ }
216
+
217
+ /**
218
+ * Decide resume vs fresh on a wake. Priority (contract ЖЦ §resume/fresh, /new):
219
+ * 1. /new-mark present → eager graceful re-launch: FRESH; consume the mark.
220
+ * 2. explicit fresh (argsResume === false) → FRESH.
221
+ * 3. explicit resume (argsResume === true, e.g. attach) → RESUME, FAIL-LOUD if the
222
+ * preflight finds nothing (failReason set; never a silent fresh fallback).
223
+ * 4. default (argsResume undefined) → warm-asleep RESUME when a transcript exists,
224
+ * else FRESH (a first-ever launch has nothing to resume — NOT an error here).
225
+ * Fixes the prior divergence (code: always fresh; contract: warm-asleep → resume).
226
+ */
227
+ export function resolveWakeMode(
228
+ cfg: LifecycleConfig,
229
+ identity: string,
230
+ cwd: string,
231
+ argsResume: boolean | undefined,
232
+ resolveResume: (cwd: string) => { ok: boolean; ref?: string; reason?: string },
233
+ ): WakeMode {
234
+ if (hasNewMark(cfg, identity)) {
235
+ clearNewMark(cfg, identity) // consume the graceful mark on the fresh re-launch
236
+ return { resume: false }
237
+ }
238
+ if (argsResume === false) return { resume: false }
239
+ if (argsResume === true) {
240
+ const r = resolveResume(cwd)
241
+ if (!r.ok) return { resume: false, failReason: r.reason ?? 'resume requested but nothing to resume' }
242
+ return { resume: true, resumeRef: r.ref }
243
+ }
244
+ const r = resolveResume(cwd)
245
+ return r.ok ? { resume: true, resumeRef: r.ref } : { resume: false }
246
+ }
247
+
248
+ export function readSessionStates(cfg: LifecycleConfig): SessionState[] {
249
+ let files: string[]
250
+ try {
251
+ files = readdirSync(cfg.stateDir)
252
+ } catch {
253
+ return []
254
+ }
255
+ const out: SessionState[] = []
256
+ for (const f of files) {
257
+ if (!f.endsWith('.session')) continue
258
+ try {
259
+ const s = JSON.parse(readFileSync(join(cfg.stateDir, f), 'utf8')) as SessionState
260
+ if (s && s.identity && s.cwd && isRuntime(s.runtime)) out.push(s)
261
+ } catch {
262
+ /* skip garbage */
263
+ }
264
+ }
265
+ return out
266
+ }
267
+
268
+ // ─────────────────────────────────────────────────────────────────────────────
269
+ // H5 — wake-runtime resolution (registry-based, NO live-socket scan)
270
+ // ─────────────────────────────────────────────────────────────────────────────
271
+
272
+ /**
273
+ * Decide which runtime to wake on a miss, WITHOUT scanning live sockets (a dead
274
+ * peer has none — the registry is the only source). Order: an explicit
275
+ * caller-supplied runtime (must be declared) → peer.runtime (registry default)
276
+ * → first of peer.runtimes[] → fail-loud. (blueprint-v2 §H5)
277
+ */
278
+ export function resolveWakeRuntime(
279
+ requested: string | undefined,
280
+ peer: PeerRecord,
281
+ ): Result<Runtime> {
282
+ if (requested) {
283
+ if (!isRuntime(requested)) return err(`invalid runtime "${requested}"`)
284
+ if (peer.runtime !== requested && !peer.runtimes.includes(requested)) {
285
+ return err(`runtime "${requested}" is not declared for "${peer.personality}"`)
286
+ }
287
+ return ok(requested)
288
+ }
289
+ if (peer.runtime) return ok(peer.runtime)
290
+ if (peer.runtimes.length > 0) return ok(peer.runtimes[0])
291
+ return err(`cannot pick a runtime to wake "${peer.personality}"; specify runtime`)
292
+ }
293
+
294
+ // ─────────────────────────────────────────────────────────────────────────────
295
+ // Wake lock — serialize wake per identity (idempotent; concurrent = one spawn)
296
+ // ─────────────────────────────────────────────────────────────────────────────
297
+
298
+ /**
299
+ * Run `fn` while holding an exclusive per-identity lock so two concurrent sends
300
+ * to the same dead peer produce exactly ONE spawn (the second waits, then takes
301
+ * the has-session fast path inside the lock). flock-style advisory lock via
302
+ * proper-lockfile on ~/.iapeer/state/lifecycle/<identity>.wake.lock.
303
+ */
304
+ export async function withWakeLock<T>(
305
+ cfg: LifecycleConfig,
306
+ identity: string,
307
+ fn: () => Promise<T>,
308
+ ): Promise<T> {
309
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
310
+ const lockTarget = join(cfg.stateDir, `${identity}.wake.lock`)
311
+ writeFileSync(lockTarget, '', { flag: 'a', mode: 0o600 })
312
+ const release = await lockfile.lock(lockTarget, {
313
+ realpath: false,
314
+ stale: 60_000,
315
+ update: 5_000,
316
+ retries: { retries: 30, factor: 1.3, minTimeout: 100, maxTimeout: 1_000 },
317
+ })
318
+ try {
319
+ return await fn()
320
+ } finally {
321
+ await release()
322
+ }
323
+ }
324
+
325
+ // ─────────────────────────────────────────────────────────────────────────────
326
+ // tmux helpers
327
+ // ─────────────────────────────────────────────────────────────────────────────
328
+
329
+ function tmux(sock: string, ...args: string[]): { ok: boolean; out: string; err: string } {
330
+ const r = spawnSync('tmux', ['-S', sock, ...args], { encoding: 'utf8' })
331
+ return { ok: r.status === 0, out: r.stdout ?? '', err: r.stderr ?? '' }
332
+ }
333
+ function sessionAlive(sock: string, identity: string): boolean {
334
+ return tmux(sock, 'has-session', '-t', identity).ok
335
+ }
336
+
337
+ // ─────────────────────────────────────────────────────────────────────────────
338
+ // System-prompt composition for a woken peer (delegates the jq doctrine-merge to
339
+ // launch/composeSystemPrompt). The tmux launch + boot/ready + activity-proxy all
340
+ // moved to launch/ (Ф3); lifecycle only gathers the inputs and decides when.
341
+ // ─────────────────────────────────────────────────────────────────────────────
342
+
343
+ function gatherSystemFacts(env: NodeJS.ProcessEnv): {
344
+ platform: string
345
+ osVersion: string
346
+ user: string
347
+ hostname: string
348
+ today: string
349
+ } {
350
+ const platform =
351
+ process.platform === 'darwin'
352
+ ? 'darwin'
353
+ : process.platform === 'linux'
354
+ ? 'linux'
355
+ : String(process.platform)
356
+ let osVersion = 'unknown'
357
+ if (platform === 'darwin') {
358
+ const r = spawnSync('sw_vers', ['-productVersion'], { encoding: 'utf8' })
359
+ if (r.status === 0) osVersion = (r.stdout ?? '').trim() || 'unknown'
360
+ } else if (platform === 'linux') {
361
+ try {
362
+ const m = readFileSync('/etc/os-release', 'utf8').match(/^VERSION_ID="?([^"\n]+)/m)
363
+ osVersion = m?.[1] ?? 'unknown'
364
+ } catch {
365
+ /* unknown */
366
+ }
367
+ }
368
+ let hostname = 'unknown'
369
+ const h = spawnSync('hostname', ['-s'], { encoding: 'utf8' })
370
+ if (h.status === 0 && (h.stdout ?? '').trim()) hostname = h.stdout.trim()
371
+ let user = env.USER?.trim() ?? ''
372
+ if (!user) {
373
+ const r = spawnSync('id', ['-un'], { encoding: 'utf8' })
374
+ user = (r.stdout ?? '').trim() || 'unknown'
375
+ }
376
+ const d = new Date()
377
+ const today = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`
378
+ return { platform, osVersion, user, hostname, today }
379
+ }
380
+
381
+ /**
382
+ * Compose the merged system prompt for a peer that carries a doctrine and write
383
+ * it to a per-identity file, returning its path (for --system-prompt-file /
384
+ * model_instructions_file).
385
+ *
386
+ * Канал A, all four layers (docs/Сборка системного промпта): 1 YAML facts +
387
+ * 2 IAPEER.md (global+local) + 3 normalized registry + 4 every other <DOMAIN>.md
388
+ * (global+local). FS discovery is delegated to gatherPromptInput; composeSystem
389
+ * Prompt lays out the bytes. Layers 3+4 add nothing when there are no peers and
390
+ * no extra domains, so the output stays golden-identical for a bare doctrine.
391
+ *
392
+ * BARE-SESSION GATE (unchanged): a peer WITHOUT a local <cwd>/.iapeer/IAPEER.md
393
+ * doctrine → undefined (a throwaway test peer launches bare). The local doctrine
394
+ * is what marks "this is a configured peer" (contract: role lives in that file).
395
+ */
396
+ function composePeerPrompt(
397
+ peer: PeerRecord,
398
+ cwd: string,
399
+ identity: string,
400
+ cfg: LifecycleConfig,
401
+ env: NodeJS.ProcessEnv,
402
+ peers: PublicPeerSummary[],
403
+ ): string | undefined {
404
+ const peerDoctrinePath = join(cwd, '.iapeer', 'IAPEER.md')
405
+ if (!existsSync(peerDoctrinePath)) return undefined
406
+ const facts = gatherSystemFacts(env)
407
+ // `peers` is the registry already read for findPeer (wake path) — passed through
408
+ // so gatherPromptInput does NOT read+parse peers-profiles.json a second time on
409
+ // this hot launch path (and the corrupt-registry failure stays at that one read).
410
+ const input = gatherPromptInput({
411
+ personality: peer.personality,
412
+ description: peer.description,
413
+ cwd,
414
+ ...facts,
415
+ env,
416
+ peers,
417
+ })
418
+ const prompt = composeSystemPrompt(input)
419
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
420
+ const file = join(cfg.stateDir, `${identity}.system-prompt.md`)
421
+ writeFileSync(file, prompt, { mode: 0o600 })
422
+ return file
423
+ }
424
+
425
+ /**
426
+ * C2 — compose the first message delivered to a FRESH-woken session: the peer's
427
+ * initial_prompt (launch-seed, contract ЖЦ §initial_prompt) followed by the routed
428
+ * `task` (the IAP envelope), so the agent sees the opening directive THEN the message
429
+ * it must reply to (with its from-*). On resume (fresh=false) or with no seed → just
430
+ * the task. The seed is read from the local profile best-effort: a profile read error
431
+ * yields no seed and never blocks the wake (the seed is optional).
432
+ */
433
+ export function composeFirstMessage(cwd: string, task: string, fresh: boolean): string {
434
+ if (!fresh) return task
435
+ let seed: string | undefined
436
+ try {
437
+ seed = readPeerProfile(cwd)?.initial_prompt
438
+ } catch {
439
+ /* invalid/absent profile → no seed */
440
+ }
441
+ if (!seed) return task
442
+ // seed + the routed message (both delivered, seed first). When there is NO incoming
443
+ // message (an eager /new re-launch, C4b — task is empty), the seed is self-sufficient.
444
+ return task ? `${seed}\n\n${task}` : seed
445
+ }
446
+
447
+ // ─────────────────────────────────────────────────────────────────────────────
448
+ // wakeOrSpawn — the WakeFn (= performSpawn consolidated, boot+ready inline)
449
+ // ─────────────────────────────────────────────────────────────────────────────
450
+
451
+ export interface WakeArgs {
452
+ personality: string
453
+ runtime?: string
454
+ topic?: string
455
+ /** First message delivered to the woken session (the routed envelope). */
456
+ task: string
457
+ resume?: boolean
458
+ }
459
+
460
+ export interface WakeResult {
461
+ status: 'READY' | 'FAILED'
462
+ woke: boolean
463
+ runtime?: Runtime
464
+ process_address?: string
465
+ reason?: string
466
+ /** C1: the wake was refused because the peer carries a durable stop flag (a
467
+ * deliberate halt, distinct from offline/wake-failure). The sender is told the
468
+ * peer is stopped, not that delivery failed transiently. */
469
+ stopped?: boolean
470
+ }
471
+
472
+ export interface WakeDeps {
473
+ env?: NodeJS.ProcessEnv
474
+ cfg?: LifecycleConfig
475
+ }
476
+
477
+ /**
478
+ * Wake (or, idempotently, reuse) a peer session and deliver `task` as its first
479
+ * message; resolve to READY only after the model has produced its first turn
480
+ * (transcript mtime advances past baseline). Serialized per-identity by
481
+ * withWakeLock. Refuses a launchd-managed peer (H4). Ф2 claude path; codex is a
482
+ * follow-up (the structure generalizes).
483
+ */
484
+ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<WakeResult> {
485
+ const env = deps.env ?? process.env
486
+ const cfg = deps.cfg ?? loadLifecycleConfig(env)
487
+
488
+ // Heal strays before launching — the sweep-at-spawn-start. This is the SAME
489
+ // H4-guarded superviseTick the daemon timer runs, so both reap entry points
490
+ // (timer + wake) go through one guarded path. Best-effort: never block a wake.
491
+ try {
492
+ superviseTick(cfg, { env })
493
+ } catch {
494
+ /* supervision must never affect this wake's outcome */
495
+ }
496
+
497
+ const peersIndex = readPeersIndex({ env })
498
+ const peer = findPeer(peersIndex, args.personality)
499
+ if (!peer) return { status: 'FAILED', woke: false, reason: `unknown peer "${args.personality}"` }
500
+
501
+ // H4 — never wake a launchd-managed peer (launchd KeepAlive owns it).
502
+ if (isLaunchdManaged(args.personality, env)) {
503
+ return {
504
+ status: 'FAILED',
505
+ woke: false,
506
+ reason: `"${args.personality}" is launchd-managed; the daemon does not wake it (launchd KeepAlive owns its lifecycle)`,
507
+ }
508
+ }
509
+
510
+ const runtimeResult = resolveWakeRuntime(args.runtime, peer)
511
+ if (!runtimeResult.ok) return { status: 'FAILED', woke: false, reason: runtimeResult.error.message }
512
+ const runtime = runtimeResult.value
513
+
514
+ // Resolve the per-runtime adapter (launch = HOW). getAdapter throws only for an
515
+ // unregistered runtime (claude/codex/telegram/notifier are all registered);
516
+ // surface that as FAILED rather than letting it escape the wake.
517
+ let adapter
518
+ try {
519
+ adapter = getAdapter(runtime)
520
+ } catch (e) {
521
+ return { status: 'FAILED', woke: false, runtime, reason: e instanceof Error ? e.message : String(e) }
522
+ }
523
+
524
+ const identity = buildProcessAddress(runtime, args.personality)
525
+ const sock = buildSocketPath(runtime, args.personality, cfg.sockDir)
526
+ const cwd = peer.cwd
527
+
528
+ // C1 — durable stopped flag: a DELIBERATELY stopped peer is NOT woken (contract
529
+ // ЖЦ §stop, Демон §stopped). Unlike idle-reap (temporary), `stop` is an operator
530
+ // halt: refuse with stopped:true so the sender gets an explicit "stopped" error,
531
+ // not a generic "offline" — and no message is queued. `start` clears the flag.
532
+ if (isStopped(cfg, identity)) {
533
+ return {
534
+ status: 'FAILED',
535
+ woke: false,
536
+ runtime,
537
+ stopped: true,
538
+ reason: `"${args.personality}" (${runtime}) is stopped and not accepting messages; start it to resume`,
539
+ }
540
+ }
541
+
542
+ return withWakeLock(cfg, identity, async () => {
543
+ // Re-check the refusal gates INSIDE the lock (audit #3/#11): a `stop` (C1 flag) or a
544
+ // plist install that completed AFTER the pre-lock check but before the lock was
545
+ // acquired must still be honored — else a concurrently stopped / launchd-claimed peer
546
+ // could be spawned live-but-flagged. These re-checks are fail-SAFE (they only add a
547
+ // refusal). A stop racing DURING the spawn is a narrower window the wake-lock does not
548
+ // cover (stop does not take this lock).
549
+ if (isStopped(cfg, identity)) {
550
+ return { status: 'FAILED', woke: false, runtime, stopped: true, reason: `"${args.personality}" (${runtime}) is stopped and not accepting messages; start it to resume` }
551
+ }
552
+ if (isLaunchdManaged(args.personality, env)) {
553
+ return { status: 'FAILED', woke: false, runtime, reason: `"${args.personality}" became launchd-managed mid-wake; the daemon does not wake it` }
554
+ }
555
+ // Idempotent fast path inside the lock: a live session wins (a concurrent
556
+ // wake already brought it up) — no second spawn.
557
+ if (sessionAlive(sock, identity)) {
558
+ writeSessionState(cfg, { identity, runtime, personality: args.personality, cwd, wokeAt: Date.now() })
559
+ return { status: 'READY', woke: false, runtime, process_address: identity }
560
+ }
561
+ if (!existsSync(cwd)) {
562
+ return { status: 'FAILED', woke: false, runtime, reason: `peer cwd does not exist: ${cwd}` }
563
+ }
564
+
565
+ // C3a + C4a — resolve resume vs fresh (extracted resolveWakeMode, contract ЖЦ
566
+ // §resume/fresh). An EXPLICIT resume that finds nothing to resume fails loud.
567
+ const mode = resolveWakeMode(cfg, identity, cwd, args.resume, c => adapter.resolveResume(c))
568
+ if (mode.failReason) return { status: 'FAILED', woke: false, runtime, reason: mode.failReason }
569
+ const resume = mode.resume
570
+ const resumeRef = mode.resumeRef
571
+ const fresh = !resume
572
+
573
+ // Compose the system prompt when the peer carries a doctrine (tui runtimes);
574
+ // a doctrine-less peer (throwaway) → undefined → a bare session.
575
+ const systemPromptFile = adapter.usesDoctrine
576
+ ? composePeerPrompt(peer, cwd, identity, cfg, env, peersIndex.peers.map(publicPeerSummary))
577
+ : undefined
578
+
579
+ // Hand the fully-resolved spec to the launch primitive (HOW). lifecycle has
580
+ // made every WHEN/HOW-MANY decision (lock, registry, H4, runtime, resume).
581
+ const spec: LaunchSpec = {
582
+ personality: args.personality,
583
+ runtime,
584
+ cwd,
585
+ identity,
586
+ socketPath: sock,
587
+ systemPromptFile,
588
+ resume, // RESOLVED resume/fresh (C3a), not the raw caller flag
589
+ resumeRef,
590
+ extraArgs: [],
591
+ // Carry the peer's nature so the launch primitive can enforce an adapter's
592
+ // intelligence gate (telegram requires natural). From the registry record.
593
+ intelligence: peer.intelligence,
594
+ }
595
+ const launchCfg: LaunchConfig = {
596
+ claudeBin: cfg.claudeBin,
597
+ codexBin: cfg.codexBin,
598
+ sockDir: cfg.sockDir,
599
+ bootDeadlineSecs: cfg.bootDeadlineSecs,
600
+ readyGateSecs: cfg.readyGateSecs,
601
+ maxAgeSecs: cfg.maxAgeSecs,
602
+ logDir: cfg.logDir,
603
+ env,
604
+ }
605
+ // C2 — initial_prompt (launch-seed): on a FRESH wake, seed the first turn with
606
+ // the peer's initial_prompt BEFORE the routed message — the agent sees the
607
+ // opening directive, then the IAP message (with its from-* to reply to). NOT on
608
+ // resume (a resumed session already holds its context). Best-effort read: a
609
+ // profile read hiccup must never block the wake (the seed is optional).
610
+ const firstMessage = composeFirstMessage(cwd, args.task, fresh)
611
+ const result = await launch(spec, adapter, firstMessage, launchCfg)
612
+ if (result.status === 'FAILED') {
613
+ return { status: 'FAILED', woke: false, runtime, reason: result.reason }
614
+ }
615
+ // The session is up and the message delivered. Recording supervise-state must not
616
+ // turn a successful wake into a failure (audit #18): on a write hiccup the session is
617
+ // still bounded by its tmux self-TTL — log loudly rather than throw past a live spawn.
618
+ try {
619
+ writeSessionState(cfg, { identity, runtime, personality: args.personality, cwd, wokeAt: Date.now() })
620
+ } catch (e) {
621
+ process.stderr.write(
622
+ `[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
623
+ )
624
+ }
625
+ return { status: 'READY', woke: true, runtime, process_address: identity }
626
+ })
627
+ }
628
+
629
+ // ─────────────────────────────────────────────────────────────────────────────
630
+ // Reap — kill a session (used by idle-reap / supervise; H4-guarded by callers)
631
+ // ─────────────────────────────────────────────────────────────────────────────
632
+
633
+ export function killSession(sock: string, identity: string): void {
634
+ tmux(sock, 'kill-session', '-t', identity)
635
+ const sessions = tmux(sock, 'list-sessions', '-F', '#{session_name}').out
636
+ if (!sessions.trim()) {
637
+ tmux(sock, 'kill-server')
638
+ try {
639
+ rmSync(sock, { force: true })
640
+ } catch {
641
+ /* best-effort */
642
+ }
643
+ }
644
+ }
645
+
646
+ // ─────────────────────────────────────────────────────────────────────────────
647
+ // superviseTick — the SINGLE H4-guarded reap pass (idle + zombie-gone)
648
+ // ─────────────────────────────────────────────────────────────────────────────
649
+ //
650
+ // Walks only daemon-owned sessions (those wakeOrSpawn recorded a .session for —
651
+ // a launchd peer never has one). For each candidate the launchd-plist check is
652
+ // FIRST: a launchd-managed peer is skipped untouched (H4 — the daemon is
653
+ // read-only for it; reaping it would fight launchd KeepAlive on the live fleet).
654
+ // In this consolidation the idle-reap and the zombie-sweep are ONE guarded path,
655
+ // so no reap can bypass H4. Called by the daemon's supervise timer AND at the
656
+ // start of every wakeOrSpawn (heal strays before launching).
657
+
658
+ export interface SuperviseOutcome {
659
+ identity: string
660
+ action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
661
+ reason?: string
662
+ /** For 'needs-eager-fresh' (C4b): the peer to EAGERLY re-launch fresh (its session
663
+ * died carrying a /new-mark). The daemon timer drives the async relaunch. */
664
+ personality?: string
665
+ runtime?: Runtime
666
+ }
667
+
668
+ export interface SuperviseDeps {
669
+ env?: NodeJS.ProcessEnv
670
+ nowMs?: number
671
+ }
672
+
673
+ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): SuperviseOutcome[] {
674
+ const env = deps.env ?? process.env
675
+ const nowMs = deps.nowMs ?? Date.now()
676
+ const out: SuperviseOutcome[] = []
677
+ for (const s of readSessionStates(cfg)) {
678
+ // H4 — FIRST, before any reap. A launchd-managed peer is read-only.
679
+ if (isLaunchdManaged(s.personality, env)) {
680
+ out.push({ identity: s.identity, action: 'skipped-launchd' })
681
+ continue
682
+ }
683
+ const sock = buildSocketPath(s.runtime, s.personality, cfg.sockDir)
684
+ if (!sessionAlive(sock, s.identity)) {
685
+ removeSessionState(cfg, s.identity)
686
+ // C4b — a session that died carrying a /new-mark is a GRACEFUL завершение by the
687
+ // owner: re-launch EAGERLY as fresh (not lazily on the next message, the way a
688
+ // markless idle-reap death resumes). The mark is LEFT for the relaunch's
689
+ // resolveWakeMode to consume; the daemon timer drives the async wakeOrSpawn.
690
+ if (hasNewMark(cfg, s.identity)) {
691
+ out.push({
692
+ identity: s.identity,
693
+ action: 'needs-eager-fresh',
694
+ reason: '/new graceful mark — eager fresh re-launch',
695
+ personality: s.personality,
696
+ runtime: s.runtime,
697
+ })
698
+ continue
699
+ }
700
+ out.push({ identity: s.identity, action: 'reaped-gone', reason: 'session no longer live' })
701
+ continue
702
+ }
703
+ // Idle accounting via the runtime adapter's activity proxy (claude transcript
704
+ // / codex session mtime); fall back to wokeAt so a session that produced zero
705
+ // further turns is still reaped, and when no adapter exists yet (codex/telegram
706
+ // pre-workflow-#2 — session-states are claude-only for now anyway).
707
+ let mt = s.wokeAt
708
+ try {
709
+ mt = getAdapter(s.runtime).newestActivityMtime(s.cwd) ?? s.wokeAt
710
+ } catch {
711
+ /* no adapter for this runtime yet → wokeAt fallback */
712
+ }
713
+ const ageSecs = Math.floor((nowMs - mt) / 1000)
714
+ if (ageSecs > cfg.idleSecs) {
715
+ killSession(sock, s.identity)
716
+ removeSessionState(cfg, s.identity)
717
+ out.push({ identity: s.identity, action: 'reaped-idle', reason: `idle ${ageSecs}s` })
718
+ } else {
719
+ out.push({ identity: s.identity, action: 'alive' })
720
+ }
721
+ }
722
+ return out
723
+ }
724
+
725
+ /**
726
+ * C4b — drive the EAGER fresh re-launch for peers superviseTick flagged
727
+ * 'needs-eager-fresh' (their session died carrying a /new graceful mark). Async +
728
+ * best-effort: task='' so the seed (initial_prompt) is self-sufficient (a /new has no
729
+ * incoming message — the agent auto-reports "I'm up" from the seed). resolveWakeMode
730
+ * consumes the /new-mark on the relaunch (→ fresh). A relaunch failure leaves the
731
+ * mark, so the peer still fresh-wakes on its next message — graceful degrades to lazy,
732
+ * never lost. NB: a /new'd peer is expected to carry an initial_prompt (the report
733
+ * directive); without one the seed is empty and the first turn delivers nothing.
734
+ */
735
+ export async function processEagerRelaunches(
736
+ cfg: LifecycleConfig,
737
+ outcomes: SuperviseOutcome[],
738
+ deps: WakeDeps = {},
739
+ ): Promise<WakeResult[]> {
740
+ const results: WakeResult[] = []
741
+ for (const o of outcomes) {
742
+ if (o.action !== 'needs-eager-fresh' || !o.personality || !o.runtime) continue
743
+ try {
744
+ results.push(
745
+ await wakeOrSpawn({ personality: o.personality, runtime: o.runtime, task: '' }, { cfg, env: deps.env }),
746
+ )
747
+ } catch (e) {
748
+ results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
749
+ }
750
+ }
751
+ return results
752
+ }
753
+
754
+ // ─────────────────────────────────────────────────────────────────────────────
755
+ // folderLaunch / attachPeer — operator verbs (contract ЖЦ §Запуск из папки, §attach;
756
+ // Примитивы §Карта verbs). Both reuse wakeOrSpawn (one bring-up path); the difference
757
+ // is resume vs fresh and which runtime.
758
+ // ─────────────────────────────────────────────────────────────────────────────
759
+
760
+ export interface FolderLaunchOptions {
761
+ cwd: string
762
+ runtime?: string
763
+ env?: NodeJS.ProcessEnv
764
+ cfg?: LifecycleConfig
765
+ }
766
+
767
+ /**
768
+ * `iapeer <runtime>` (launch) — bring up the peer of the CURRENT cwd, ALWAYS FRESH
769
+ * (contract: folder-launch never resumes). personality/runtime come from the cwd's
770
+ * profile (resolveIdentity), not an arg. The fresh session carries the initial_prompt
771
+ * seed if the peer has one (composeFirstMessage), else a bare interactive session the
772
+ * operator drives. Goes through wakeOrSpawn (resume:false) so H4 / the wake-lock /
773
+ * the intelligence gate all apply — incl. H4 refusal for a launchd-managed peer (a
774
+ * fresh folder-launch alongside its launchd session would collide on the identity).
775
+ */
776
+ export async function folderLaunch(opts: FolderLaunchOptions): Promise<WakeResult> {
777
+ const env = opts.env ?? process.env
778
+ const cfg = opts.cfg ?? loadLifecycleConfig(env)
779
+ const identity = resolveIdentity({ cwd: opts.cwd, env })
780
+ const runtime = opts.runtime ?? identity.runtime
781
+ const seed = composeFirstMessage(opts.cwd, '', true) // initial_prompt or '' (bare)
782
+ return wakeOrSpawn({ personality: identity.personality, runtime, task: seed, resume: false }, { cfg, env })
783
+ }
784
+
785
+ /**
786
+ * The runtime with the freshest transcript activity for a peer (contract: attach
787
+ * resolves the runtime by last-active transcript-mtime, NOT the profile default).
788
+ * undefined when no runtime has any activity (a never-run peer).
789
+ */
790
+ export function lastActiveRuntime(peer: PeerRecord, cfg: LifecycleConfig): Runtime | undefined {
791
+ let best: Runtime | undefined
792
+ let bestMt = -1
793
+ for (const rt of peer.runtimes) {
794
+ try {
795
+ const mt = getAdapter(rt).newestActivityMtime(peer.cwd)
796
+ if (mt !== null && mt > bestMt) {
797
+ bestMt = mt
798
+ best = rt
799
+ }
800
+ } catch {
801
+ /* no adapter / no proxy for this runtime */
802
+ }
803
+ }
804
+ return best
805
+ }
806
+
807
+ export interface AttachOptions {
808
+ personality: string
809
+ runtime?: string
810
+ env?: NodeJS.ProcessEnv
811
+ cfg?: LifecycleConfig
812
+ }
813
+ export type AttachResult =
814
+ | { ok: true; identity: string; socketPath: string; woke: boolean; runtime: Runtime }
815
+ | { ok: false; reason: string }
816
+
817
+ /**
818
+ * `iapeer attach <peer> [runtime]` — ensure the peer is live, then hand back the
819
+ * socket/identity for the caller to `tmux attach`. ALWAYS RESUME (contract: attach
820
+ * never starts fresh). Runtime: explicit arg, else the LAST-ACTIVE runtime by
821
+ * transcript-mtime (not the profile default), else the profile default. A warm-live
822
+ * session is attached directly; a warm-asleep one is woken with --resume first
823
+ * (fail-loud if there is nothing to resume — a never-run peer must be folder-launched).
824
+ */
825
+ export async function attachPeer(opts: AttachOptions): Promise<AttachResult> {
826
+ const env = opts.env ?? process.env
827
+ const cfg = opts.cfg ?? loadLifecycleConfig(env)
828
+ const peer = findPeer(readPeersIndex({ env }), opts.personality)
829
+ if (!peer) return { ok: false, reason: `peer "${opts.personality}" is not registered` }
830
+ const runtimeResult = resolveWakeRuntime(opts.runtime, peer)
831
+ if (opts.runtime && !runtimeResult.ok) return { ok: false, reason: runtimeResult.error.message }
832
+ // last-active (by mtime) wins over the profile default when no runtime is given.
833
+ const runtime = opts.runtime ?? lastActiveRuntime(peer, cfg) ?? peer.runtime
834
+ const identity = buildProcessAddress(runtime, opts.personality)
835
+ const sock = buildSocketPath(runtime, opts.personality, cfg.sockDir)
836
+ if (sessionAlive(sock, identity)) return { ok: true, identity, socketPath: sock, woke: false, runtime }
837
+ const woke = await wakeOrSpawn({ personality: opts.personality, runtime, task: '', resume: true }, { cfg, env })
838
+ if (woke.status === 'FAILED') return { ok: false, reason: woke.reason ?? 'wake failed' }
839
+ return { ok: true, identity, socketPath: sock, woke: true, runtime }
840
+ }