@agfpd/iapeer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/iapeer +25 -0
- package/package.json +37 -0
- package/src/cli/cli.test.ts +130 -0
- package/src/cli/index.ts +608 -0
- package/src/cli/listTui.test.ts +70 -0
- package/src/cli/listTui.ts +165 -0
- package/src/codec/codec.test.ts +271 -0
- package/src/codec/index.ts +217 -0
- package/src/core/constants.test.ts +21 -0
- package/src/core/constants.ts +180 -0
- package/src/core/errors.ts +20 -0
- package/src/core/index.ts +3 -0
- package/src/core/normalize.test.ts +98 -0
- package/src/core/normalize.ts +89 -0
- package/src/core/socket.ts +63 -0
- package/src/create/create.test.ts +143 -0
- package/src/create/index.ts +178 -0
- package/src/daemon/daemon-http.test.ts +114 -0
- package/src/daemon/daemon.test.ts +103 -0
- package/src/daemon/index.ts +439 -0
- package/src/daemon/main.test.ts +194 -0
- package/src/daemon/main.ts +230 -0
- package/src/enable/enable.test.ts +92 -0
- package/src/enable/index.ts +381 -0
- package/src/identity/identity.test.ts +262 -0
- package/src/identity/index.ts +603 -0
- package/src/index.ts +27 -0
- package/src/init/index.ts +408 -0
- package/src/init/init.test.ts +171 -0
- package/src/init/runtime-resolve.test.ts +49 -0
- package/src/install/index.ts +84 -0
- package/src/install/install.test.ts +31 -0
- package/src/launch/adapters/claude.ts +250 -0
- package/src/launch/adapters/codex.ts +329 -0
- package/src/launch/adapters/notifier.ts +90 -0
- package/src/launch/adapters/telegram.ts +130 -0
- package/src/launch/bootstrap.test.ts +56 -0
- package/src/launch/composeSystemPrompt.layers.test.ts +319 -0
- package/src/launch/composeSystemPrompt.test.ts +98 -0
- package/src/launch/composeSystemPrompt.ts +261 -0
- package/src/launch/index.ts +253 -0
- package/src/launch/launch.test.ts +233 -0
- package/src/launch/launchd.test.ts +363 -0
- package/src/launch/launchd.ts +375 -0
- package/src/launch/launchdRun.ts +168 -0
- package/src/launch/sockdir.test.ts +70 -0
- package/src/launch/types.ts +300 -0
- package/src/lifecycle/index.ts +840 -0
- package/src/lifecycle/lifecycle.test.ts +496 -0
- package/src/onboard/index.ts +135 -0
- package/src/onboard/onboard.test.ts +39 -0
- package/src/provision/index.ts +170 -0
- package/src/provision/provision.test.ts +104 -0
- package/src/registry/index.ts +453 -0
- package/src/registry/registry.test.ts +400 -0
- package/src/runtime/deploy.ts +230 -0
- package/src/runtime/index.ts +191 -0
- package/src/runtime/runtime.test.ts +226 -0
- package/src/storage/index.ts +331 -0
- package/src/storage/peers-home.test.ts +34 -0
- package/src/storage/storage.test.ts +65 -0
- package/src/transport/index.ts +522 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,840 @@
|
|
|
1
|
+
// Lifecycle — wake-on-miss / supervise / reap. The warm-on-demand core: a dead
|
|
2
|
+
// peer is woken (spawned) on demand, its first message delivered, and idle
|
|
3
|
+
// sessions are reaped. Consolidated from Spawned-Peer spawner.ts (performSpawn)
|
|
4
|
+
// + watcher.ts (boot / ready-gate / idle phases), but with the detached
|
|
5
|
+
// per-session watcher COLLAPSED into the daemon: wakeOrSpawn runs boot + ready
|
|
6
|
+
// inline (the daemon awaits it) and a single superviseTick drives idle-reap.
|
|
7
|
+
//
|
|
8
|
+
// HARD SAFETY (H4): the daemon NEVER wakes / reaps / respawns / sweeps a peer
|
|
9
|
+
// that has a launchd plist (~/Library/LaunchAgents/com.iapeer.<p>.plist). Such a
|
|
10
|
+
// peer is launchd-managed (KeepAlive owns its lifecycle); the daemon touching it
|
|
11
|
+
// would race launchd on the live fleet. isLaunchdManaged() is checked FIRST,
|
|
12
|
+
// before any wake or reap. wakeOrSpawn refuses a launchd peer; superviseTick and
|
|
13
|
+
// sweepZombies skip it. Only daemon-owned (no-plist) peers are managed here.
|
|
14
|
+
|
|
15
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from 'fs'
|
|
16
|
+
import { homedir } from 'os'
|
|
17
|
+
import { join } from 'path'
|
|
18
|
+
import { spawnSync } from 'child_process'
|
|
19
|
+
import * as lockfile from 'proper-lockfile'
|
|
20
|
+
import {
|
|
21
|
+
STATE_DIR,
|
|
22
|
+
LOGS_DIR,
|
|
23
|
+
isRuntime,
|
|
24
|
+
resolveSockDir,
|
|
25
|
+
type Runtime,
|
|
26
|
+
} from '../core/constants.ts'
|
|
27
|
+
import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
|
|
28
|
+
import { err, ok, type Result } from '../core/errors.ts'
|
|
29
|
+
import { resolveGlobalRoot } from '../storage/index.ts'
|
|
30
|
+
import { readPeerProfile, resolveIdentity } from '../identity/index.ts'
|
|
31
|
+
import { findPeer, publicPeerSummary, readPeersIndex, type PeerRecord, type PublicPeerSummary } from '../registry/index.ts'
|
|
32
|
+
// Ф3: launch = HOW to bring up ONE session (runtime-agnostic primitive + adapter).
|
|
33
|
+
// lifecycle decides WHEN/HOW-MANY and delegates the bring-up to launch.
|
|
34
|
+
import {
|
|
35
|
+
getAdapter,
|
|
36
|
+
launch,
|
|
37
|
+
launchAgentsDir,
|
|
38
|
+
launchdLabel,
|
|
39
|
+
type LaunchConfig,
|
|
40
|
+
type LaunchSpec,
|
|
41
|
+
} from '../launch/index.ts'
|
|
42
|
+
import { composeSystemPrompt, gatherPromptInput } from '../launch/composeSystemPrompt.ts'
|
|
43
|
+
|
|
44
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
|
+
// Config
|
|
46
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
export interface LifecycleConfig {
|
|
49
|
+
claudeBin: string
|
|
50
|
+
codexBin: string
|
|
51
|
+
sockDir: string
|
|
52
|
+
stateDir: string // ~/.iapeer/state/lifecycle
|
|
53
|
+
logDir: string // ~/.iapeer/logs/lifecycle
|
|
54
|
+
bootDeadlineSecs: number
|
|
55
|
+
readyGateSecs: number
|
|
56
|
+
idleSecs: number
|
|
57
|
+
maxAgeSecs: number
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
|
|
61
|
+
const home = env.HOME?.trim() || homedir()
|
|
62
|
+
const root = resolveGlobalRoot(env)
|
|
63
|
+
const num = (raw: string | undefined, dflt: number): number => {
|
|
64
|
+
const n = parseInt(raw ?? '', 10)
|
|
65
|
+
return Number.isFinite(n) && n > 0 ? n : dflt
|
|
66
|
+
}
|
|
67
|
+
return {
|
|
68
|
+
claudeBin: env.IAPEER_CLAUDE_BIN ?? join(home, '.local', 'bin', 'claude'),
|
|
69
|
+
codexBin: env.IAPEER_CODEX_BIN ?? 'codex',
|
|
70
|
+
sockDir: resolveSockDir(env),
|
|
71
|
+
stateDir: join(root, STATE_DIR, 'lifecycle'),
|
|
72
|
+
logDir: join(root, LOGS_DIR, 'lifecycle'),
|
|
73
|
+
bootDeadlineSecs: num(env.IAPEER_BOOT_DEADLINE_SECS, 240),
|
|
74
|
+
readyGateSecs: num(env.IAPEER_READY_GATE_SECS, 120),
|
|
75
|
+
idleSecs: num(env.IAPEER_IDLE_SECS, 3600),
|
|
76
|
+
maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
81
|
+
// H4 — launchd-managed detector (checked FIRST, before any wake/reap)
|
|
82
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* True iff the peer is launchd-managed: `~/Library/LaunchAgents/
|
|
86
|
+
* com.iapeer.<personality>.plist` exists. A launchd peer is in the launchd
|
|
87
|
+
* domain — KeepAlive owns its lifecycle — and the daemon must be READ-ONLY for
|
|
88
|
+
* it (deliver to it if live, but never wake / reap / respawn / sweep it). This
|
|
89
|
+
* is the hard guard against fighting launchd on the live fleet.
|
|
90
|
+
*/
|
|
91
|
+
export function isLaunchdManaged(personality: string, env: NodeJS.ProcessEnv = process.env): boolean {
|
|
92
|
+
// Label + LaunchAgents dir come from the SAME helpers the plist generator uses
|
|
93
|
+
// (launch/launchd.ts), so this H4 detector and installAlwaysOnPlist can never
|
|
94
|
+
// disagree on `com.iapeer.<personality>.plist`. IAPEER_LAUNCHAGENTS_DIR overrides
|
|
95
|
+
// the dir for tests (so an H4-guard test never touches ~/Library/LaunchAgents).
|
|
96
|
+
return existsSync(join(launchAgentsDir(env), `${launchdLabel(personality)}.plist`))
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
100
|
+
// Session state — what the supervisor walks (daemon-owned sessions only)
|
|
101
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
export interface SessionState {
|
|
104
|
+
identity: string
|
|
105
|
+
runtime: Runtime
|
|
106
|
+
personality: string
|
|
107
|
+
cwd: string
|
|
108
|
+
wokeAt: number
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function sessionStatePath(cfg: LifecycleConfig, identity: string): string {
|
|
112
|
+
return join(cfg.stateDir, `${identity}.session`)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function writeSessionState(cfg: LifecycleConfig, state: SessionState): void {
|
|
116
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
117
|
+
try {
|
|
118
|
+
writeFileSync(sessionStatePath(cfg, state.identity), JSON.stringify(state), { mode: 0o600 })
|
|
119
|
+
} catch {
|
|
120
|
+
/* best-effort — supervision degrades to liveness scan, never blocks a wake */
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function removeSessionState(cfg: LifecycleConfig, identity: string): void {
|
|
125
|
+
try {
|
|
126
|
+
rmSync(sessionStatePath(cfg, identity), { force: true })
|
|
127
|
+
} catch {
|
|
128
|
+
/* already gone */
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
133
|
+
// C1 — durable stopped flag (warm-on-demand stop/start; contract ЖЦ §stop/start,
|
|
134
|
+
// Демон §stopped). `stop <peer>` on a warm runtime kills the session AND drops this
|
|
135
|
+
// flag → the daemon REFUSES to wake the peer (a DELIBERATE operator halt, not a
|
|
136
|
+
// fault — no message queue, the sender gets an explicit "stopped" error). `start`
|
|
137
|
+
// clears it (wakeable again). Distinct from idle-reap (temporary; the daemon DOES
|
|
138
|
+
// wake on the next message). Lives next to the session-state, in state/lifecycle —
|
|
139
|
+
// daemon-owned, durable across restarts. Keyed on IDENTITY (runtime-personality):
|
|
140
|
+
// `stop <peer> <runtime>` halts one runtime; the flag is per-runtime presence.
|
|
141
|
+
// always-on (launchd) peers are NOT stopped this way — their stop is launchctl
|
|
142
|
+
// bootout (ЖЦ); a launchd peer never carries this flag (and the daemon is H4
|
|
143
|
+
// read-only for it regardless).
|
|
144
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
function stoppedFlagPath(cfg: LifecycleConfig, identity: string): string {
|
|
147
|
+
return join(cfg.stateDir, `${identity}.stopped`)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** True iff the peer identity carries a durable stop flag (daemon must not wake it). */
|
|
151
|
+
export function isStopped(cfg: LifecycleConfig, identity: string): boolean {
|
|
152
|
+
return existsSync(stoppedFlagPath(cfg, identity))
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/** Drop the durable stop flag (the `stop` verb does this after killing the session). */
|
|
156
|
+
export function setStopped(cfg: LifecycleConfig, identity: string): void {
|
|
157
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
158
|
+
writeFileSync(stoppedFlagPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Clear the durable stop flag (the `start` verb does this — peer wakeable again). */
|
|
162
|
+
export function clearStopped(cfg: LifecycleConfig, identity: string): void {
|
|
163
|
+
try {
|
|
164
|
+
rmSync(stoppedFlagPath(cfg, identity), { force: true })
|
|
165
|
+
} catch {
|
|
166
|
+
/* already gone */
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
171
|
+
// C4 — /new graceful mark (contract ЖЦ §/new). The AGENT, on an owner /new,
|
|
172
|
+
// writes a handoff to durable memory, drops THIS mark, and self-kills. The daemon
|
|
173
|
+
// detects the mark and re-launches EAGERLY as FRESH + initial_prompt (contrast:
|
|
174
|
+
// idle-reap is markless → lazy resume on the next message). The mark is consumed on
|
|
175
|
+
// that fresh re-launch. The mark TEXT/agent-side (doctrine /new instruction) is a
|
|
176
|
+
// separate deploy artifact; THIS is only the daemon side (detect → fresh + seed).
|
|
177
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
function newMarkPath(cfg: LifecycleConfig, identity: string): string {
|
|
180
|
+
return join(cfg.stateDir, `${identity}.new`)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/** True iff the identity carries a /new graceful mark (→ eager fresh + seed). */
|
|
184
|
+
export function hasNewMark(cfg: LifecycleConfig, identity: string): boolean {
|
|
185
|
+
return existsSync(newMarkPath(cfg, identity))
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/** Drop the /new mark (the agent's self-kill ritual does this before exiting). */
|
|
189
|
+
export function setNewMark(cfg: LifecycleConfig, identity: string): void {
|
|
190
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
191
|
+
writeFileSync(newMarkPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/** Consume the /new mark (the daemon does this on the eager fresh re-launch). */
|
|
195
|
+
export function clearNewMark(cfg: LifecycleConfig, identity: string): void {
|
|
196
|
+
try {
|
|
197
|
+
rmSync(newMarkPath(cfg, identity), { force: true })
|
|
198
|
+
} catch {
|
|
199
|
+
/* already gone */
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
204
|
+
// resolveWakeMode (C3a + C4a) — the resume-vs-fresh decision, contract ЖЦ
|
|
205
|
+
// §resume/fresh. Pure but for the /new-mark consume (a wake side-effect); takes the
|
|
206
|
+
// adapter's resolveResume as a parameter so it is unit-testable without a runtime.
|
|
207
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
export interface WakeMode {
|
|
210
|
+
resume: boolean
|
|
211
|
+
resumeRef?: string
|
|
212
|
+
/** Set ONLY for an EXPLICIT resume request that found nothing to resume — the
|
|
213
|
+
* caller must fail loud (never a silent fresh fallback). */
|
|
214
|
+
failReason?: string
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Decide resume vs fresh on a wake. Priority (contract ЖЦ §resume/fresh, /new):
|
|
219
|
+
* 1. /new-mark present → eager graceful re-launch: FRESH; consume the mark.
|
|
220
|
+
* 2. explicit fresh (argsResume === false) → FRESH.
|
|
221
|
+
* 3. explicit resume (argsResume === true, e.g. attach) → RESUME, FAIL-LOUD if the
|
|
222
|
+
* preflight finds nothing (failReason set; never a silent fresh fallback).
|
|
223
|
+
* 4. default (argsResume undefined) → warm-asleep RESUME when a transcript exists,
|
|
224
|
+
* else FRESH (a first-ever launch has nothing to resume — NOT an error here).
|
|
225
|
+
* Fixes the prior divergence (code: always fresh; contract: warm-asleep → resume).
|
|
226
|
+
*/
|
|
227
|
+
export function resolveWakeMode(
|
|
228
|
+
cfg: LifecycleConfig,
|
|
229
|
+
identity: string,
|
|
230
|
+
cwd: string,
|
|
231
|
+
argsResume: boolean | undefined,
|
|
232
|
+
resolveResume: (cwd: string) => { ok: boolean; ref?: string; reason?: string },
|
|
233
|
+
): WakeMode {
|
|
234
|
+
if (hasNewMark(cfg, identity)) {
|
|
235
|
+
clearNewMark(cfg, identity) // consume the graceful mark on the fresh re-launch
|
|
236
|
+
return { resume: false }
|
|
237
|
+
}
|
|
238
|
+
if (argsResume === false) return { resume: false }
|
|
239
|
+
if (argsResume === true) {
|
|
240
|
+
const r = resolveResume(cwd)
|
|
241
|
+
if (!r.ok) return { resume: false, failReason: r.reason ?? 'resume requested but nothing to resume' }
|
|
242
|
+
return { resume: true, resumeRef: r.ref }
|
|
243
|
+
}
|
|
244
|
+
const r = resolveResume(cwd)
|
|
245
|
+
return r.ok ? { resume: true, resumeRef: r.ref } : { resume: false }
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export function readSessionStates(cfg: LifecycleConfig): SessionState[] {
|
|
249
|
+
let files: string[]
|
|
250
|
+
try {
|
|
251
|
+
files = readdirSync(cfg.stateDir)
|
|
252
|
+
} catch {
|
|
253
|
+
return []
|
|
254
|
+
}
|
|
255
|
+
const out: SessionState[] = []
|
|
256
|
+
for (const f of files) {
|
|
257
|
+
if (!f.endsWith('.session')) continue
|
|
258
|
+
try {
|
|
259
|
+
const s = JSON.parse(readFileSync(join(cfg.stateDir, f), 'utf8')) as SessionState
|
|
260
|
+
if (s && s.identity && s.cwd && isRuntime(s.runtime)) out.push(s)
|
|
261
|
+
} catch {
|
|
262
|
+
/* skip garbage */
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return out
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
269
|
+
// H5 — wake-runtime resolution (registry-based, NO live-socket scan)
|
|
270
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Decide which runtime to wake on a miss, WITHOUT scanning live sockets (a dead
|
|
274
|
+
* peer has none — the registry is the only source). Order: an explicit
|
|
275
|
+
* caller-supplied runtime (must be declared) → peer.runtime (registry default)
|
|
276
|
+
* → first of peer.runtimes[] → fail-loud. (blueprint-v2 §H5)
|
|
277
|
+
*/
|
|
278
|
+
export function resolveWakeRuntime(
|
|
279
|
+
requested: string | undefined,
|
|
280
|
+
peer: PeerRecord,
|
|
281
|
+
): Result<Runtime> {
|
|
282
|
+
if (requested) {
|
|
283
|
+
if (!isRuntime(requested)) return err(`invalid runtime "${requested}"`)
|
|
284
|
+
if (peer.runtime !== requested && !peer.runtimes.includes(requested)) {
|
|
285
|
+
return err(`runtime "${requested}" is not declared for "${peer.personality}"`)
|
|
286
|
+
}
|
|
287
|
+
return ok(requested)
|
|
288
|
+
}
|
|
289
|
+
if (peer.runtime) return ok(peer.runtime)
|
|
290
|
+
if (peer.runtimes.length > 0) return ok(peer.runtimes[0])
|
|
291
|
+
return err(`cannot pick a runtime to wake "${peer.personality}"; specify runtime`)
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
295
|
+
// Wake lock — serialize wake per identity (idempotent; concurrent = one spawn)
|
|
296
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Run `fn` while holding an exclusive per-identity lock so two concurrent sends
|
|
300
|
+
* to the same dead peer produce exactly ONE spawn (the second waits, then takes
|
|
301
|
+
* the has-session fast path inside the lock). flock-style advisory lock via
|
|
302
|
+
* proper-lockfile on ~/.iapeer/state/lifecycle/<identity>.wake.lock.
|
|
303
|
+
*/
|
|
304
|
+
export async function withWakeLock<T>(
|
|
305
|
+
cfg: LifecycleConfig,
|
|
306
|
+
identity: string,
|
|
307
|
+
fn: () => Promise<T>,
|
|
308
|
+
): Promise<T> {
|
|
309
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
310
|
+
const lockTarget = join(cfg.stateDir, `${identity}.wake.lock`)
|
|
311
|
+
writeFileSync(lockTarget, '', { flag: 'a', mode: 0o600 })
|
|
312
|
+
const release = await lockfile.lock(lockTarget, {
|
|
313
|
+
realpath: false,
|
|
314
|
+
stale: 60_000,
|
|
315
|
+
update: 5_000,
|
|
316
|
+
retries: { retries: 30, factor: 1.3, minTimeout: 100, maxTimeout: 1_000 },
|
|
317
|
+
})
|
|
318
|
+
try {
|
|
319
|
+
return await fn()
|
|
320
|
+
} finally {
|
|
321
|
+
await release()
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
326
|
+
// tmux helpers
|
|
327
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
328
|
+
|
|
329
|
+
function tmux(sock: string, ...args: string[]): { ok: boolean; out: string; err: string } {
|
|
330
|
+
const r = spawnSync('tmux', ['-S', sock, ...args], { encoding: 'utf8' })
|
|
331
|
+
return { ok: r.status === 0, out: r.stdout ?? '', err: r.stderr ?? '' }
|
|
332
|
+
}
|
|
333
|
+
function sessionAlive(sock: string, identity: string): boolean {
|
|
334
|
+
return tmux(sock, 'has-session', '-t', identity).ok
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
338
|
+
// System-prompt composition for a woken peer (delegates the jq doctrine-merge to
|
|
339
|
+
// launch/composeSystemPrompt). The tmux launch + boot/ready + activity-proxy all
|
|
340
|
+
// moved to launch/ (Ф3); lifecycle only gathers the inputs and decides when.
|
|
341
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
342
|
+
|
|
343
|
+
function gatherSystemFacts(env: NodeJS.ProcessEnv): {
|
|
344
|
+
platform: string
|
|
345
|
+
osVersion: string
|
|
346
|
+
user: string
|
|
347
|
+
hostname: string
|
|
348
|
+
today: string
|
|
349
|
+
} {
|
|
350
|
+
const platform =
|
|
351
|
+
process.platform === 'darwin'
|
|
352
|
+
? 'darwin'
|
|
353
|
+
: process.platform === 'linux'
|
|
354
|
+
? 'linux'
|
|
355
|
+
: String(process.platform)
|
|
356
|
+
let osVersion = 'unknown'
|
|
357
|
+
if (platform === 'darwin') {
|
|
358
|
+
const r = spawnSync('sw_vers', ['-productVersion'], { encoding: 'utf8' })
|
|
359
|
+
if (r.status === 0) osVersion = (r.stdout ?? '').trim() || 'unknown'
|
|
360
|
+
} else if (platform === 'linux') {
|
|
361
|
+
try {
|
|
362
|
+
const m = readFileSync('/etc/os-release', 'utf8').match(/^VERSION_ID="?([^"\n]+)/m)
|
|
363
|
+
osVersion = m?.[1] ?? 'unknown'
|
|
364
|
+
} catch {
|
|
365
|
+
/* unknown */
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
let hostname = 'unknown'
|
|
369
|
+
const h = spawnSync('hostname', ['-s'], { encoding: 'utf8' })
|
|
370
|
+
if (h.status === 0 && (h.stdout ?? '').trim()) hostname = h.stdout.trim()
|
|
371
|
+
let user = env.USER?.trim() ?? ''
|
|
372
|
+
if (!user) {
|
|
373
|
+
const r = spawnSync('id', ['-un'], { encoding: 'utf8' })
|
|
374
|
+
user = (r.stdout ?? '').trim() || 'unknown'
|
|
375
|
+
}
|
|
376
|
+
const d = new Date()
|
|
377
|
+
const today = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`
|
|
378
|
+
return { platform, osVersion, user, hostname, today }
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Compose the merged system prompt for a peer that carries a doctrine and write
|
|
383
|
+
* it to a per-identity file, returning its path (for --system-prompt-file /
|
|
384
|
+
* model_instructions_file).
|
|
385
|
+
*
|
|
386
|
+
* Канал A, all four layers (docs/Сборка системного промпта): 1 YAML facts +
|
|
387
|
+
* 2 IAPEER.md (global+local) + 3 normalized registry + 4 every other <DOMAIN>.md
|
|
388
|
+
* (global+local). FS discovery is delegated to gatherPromptInput; composeSystem
|
|
389
|
+
* Prompt lays out the bytes. Layers 3+4 add nothing when there are no peers and
|
|
390
|
+
* no extra domains, so the output stays golden-identical for a bare doctrine.
|
|
391
|
+
*
|
|
392
|
+
* BARE-SESSION GATE (unchanged): a peer WITHOUT a local <cwd>/.iapeer/IAPEER.md
|
|
393
|
+
* doctrine → undefined (a throwaway test peer launches bare). The local doctrine
|
|
394
|
+
* is what marks "this is a configured peer" (contract: role lives in that file).
|
|
395
|
+
*/
|
|
396
|
+
function composePeerPrompt(
|
|
397
|
+
peer: PeerRecord,
|
|
398
|
+
cwd: string,
|
|
399
|
+
identity: string,
|
|
400
|
+
cfg: LifecycleConfig,
|
|
401
|
+
env: NodeJS.ProcessEnv,
|
|
402
|
+
peers: PublicPeerSummary[],
|
|
403
|
+
): string | undefined {
|
|
404
|
+
const peerDoctrinePath = join(cwd, '.iapeer', 'IAPEER.md')
|
|
405
|
+
if (!existsSync(peerDoctrinePath)) return undefined
|
|
406
|
+
const facts = gatherSystemFacts(env)
|
|
407
|
+
// `peers` is the registry already read for findPeer (wake path) — passed through
|
|
408
|
+
// so gatherPromptInput does NOT read+parse peers-profiles.json a second time on
|
|
409
|
+
// this hot launch path (and the corrupt-registry failure stays at that one read).
|
|
410
|
+
const input = gatherPromptInput({
|
|
411
|
+
personality: peer.personality,
|
|
412
|
+
description: peer.description,
|
|
413
|
+
cwd,
|
|
414
|
+
...facts,
|
|
415
|
+
env,
|
|
416
|
+
peers,
|
|
417
|
+
})
|
|
418
|
+
const prompt = composeSystemPrompt(input)
|
|
419
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
420
|
+
const file = join(cfg.stateDir, `${identity}.system-prompt.md`)
|
|
421
|
+
writeFileSync(file, prompt, { mode: 0o600 })
|
|
422
|
+
return file
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* C2 — compose the first message delivered to a FRESH-woken session: the peer's
|
|
427
|
+
* initial_prompt (launch-seed, contract ЖЦ §initial_prompt) followed by the routed
|
|
428
|
+
* `task` (the IAP envelope), so the agent sees the opening directive THEN the message
|
|
429
|
+
* it must reply to (with its from-*). On resume (fresh=false) or with no seed → just
|
|
430
|
+
* the task. The seed is read from the local profile best-effort: a profile read error
|
|
431
|
+
* yields no seed and never blocks the wake (the seed is optional).
|
|
432
|
+
*/
|
|
433
|
+
export function composeFirstMessage(cwd: string, task: string, fresh: boolean): string {
|
|
434
|
+
if (!fresh) return task
|
|
435
|
+
let seed: string | undefined
|
|
436
|
+
try {
|
|
437
|
+
seed = readPeerProfile(cwd)?.initial_prompt
|
|
438
|
+
} catch {
|
|
439
|
+
/* invalid/absent profile → no seed */
|
|
440
|
+
}
|
|
441
|
+
if (!seed) return task
|
|
442
|
+
// seed + the routed message (both delivered, seed first). When there is NO incoming
|
|
443
|
+
// message (an eager /new re-launch, C4b — task is empty), the seed is self-sufficient.
|
|
444
|
+
return task ? `${seed}\n\n${task}` : seed
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
448
|
+
// wakeOrSpawn — the WakeFn (= performSpawn consolidated, boot+ready inline)
|
|
449
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
450
|
+
|
|
451
|
+
export interface WakeArgs {
|
|
452
|
+
personality: string
|
|
453
|
+
runtime?: string
|
|
454
|
+
topic?: string
|
|
455
|
+
/** First message delivered to the woken session (the routed envelope). */
|
|
456
|
+
task: string
|
|
457
|
+
resume?: boolean
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
export interface WakeResult {
|
|
461
|
+
status: 'READY' | 'FAILED'
|
|
462
|
+
woke: boolean
|
|
463
|
+
runtime?: Runtime
|
|
464
|
+
process_address?: string
|
|
465
|
+
reason?: string
|
|
466
|
+
/** C1: the wake was refused because the peer carries a durable stop flag (a
|
|
467
|
+
* deliberate halt, distinct from offline/wake-failure). The sender is told the
|
|
468
|
+
* peer is stopped, not that delivery failed transiently. */
|
|
469
|
+
stopped?: boolean
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
export interface WakeDeps {
|
|
473
|
+
env?: NodeJS.ProcessEnv
|
|
474
|
+
cfg?: LifecycleConfig
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Wake (or, idempotently, reuse) a peer session and deliver `task` as its first
|
|
479
|
+
* message; resolve to READY only after the model has produced its first turn
|
|
480
|
+
* (transcript mtime advances past baseline). Serialized per-identity by
|
|
481
|
+
* withWakeLock. Refuses a launchd-managed peer (H4). Ф2 claude path; codex is a
|
|
482
|
+
* follow-up (the structure generalizes).
|
|
483
|
+
*/
|
|
484
|
+
export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<WakeResult> {
|
|
485
|
+
const env = deps.env ?? process.env
|
|
486
|
+
const cfg = deps.cfg ?? loadLifecycleConfig(env)
|
|
487
|
+
|
|
488
|
+
// Heal strays before launching — the sweep-at-spawn-start. This is the SAME
|
|
489
|
+
// H4-guarded superviseTick the daemon timer runs, so both reap entry points
|
|
490
|
+
// (timer + wake) go through one guarded path. Best-effort: never block a wake.
|
|
491
|
+
try {
|
|
492
|
+
superviseTick(cfg, { env })
|
|
493
|
+
} catch {
|
|
494
|
+
/* supervision must never affect this wake's outcome */
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const peersIndex = readPeersIndex({ env })
|
|
498
|
+
const peer = findPeer(peersIndex, args.personality)
|
|
499
|
+
if (!peer) return { status: 'FAILED', woke: false, reason: `unknown peer "${args.personality}"` }
|
|
500
|
+
|
|
501
|
+
// H4 — never wake a launchd-managed peer (launchd KeepAlive owns it).
|
|
502
|
+
if (isLaunchdManaged(args.personality, env)) {
|
|
503
|
+
return {
|
|
504
|
+
status: 'FAILED',
|
|
505
|
+
woke: false,
|
|
506
|
+
reason: `"${args.personality}" is launchd-managed; the daemon does not wake it (launchd KeepAlive owns its lifecycle)`,
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
const runtimeResult = resolveWakeRuntime(args.runtime, peer)
|
|
511
|
+
if (!runtimeResult.ok) return { status: 'FAILED', woke: false, reason: runtimeResult.error.message }
|
|
512
|
+
const runtime = runtimeResult.value
|
|
513
|
+
|
|
514
|
+
// Resolve the per-runtime adapter (launch = HOW). getAdapter throws only for an
|
|
515
|
+
// unregistered runtime (claude/codex/telegram/notifier are all registered);
|
|
516
|
+
// surface that as FAILED rather than letting it escape the wake.
|
|
517
|
+
let adapter
|
|
518
|
+
try {
|
|
519
|
+
adapter = getAdapter(runtime)
|
|
520
|
+
} catch (e) {
|
|
521
|
+
return { status: 'FAILED', woke: false, runtime, reason: e instanceof Error ? e.message : String(e) }
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const identity = buildProcessAddress(runtime, args.personality)
|
|
525
|
+
const sock = buildSocketPath(runtime, args.personality, cfg.sockDir)
|
|
526
|
+
const cwd = peer.cwd
|
|
527
|
+
|
|
528
|
+
// C1 — durable stopped flag: a DELIBERATELY stopped peer is NOT woken (contract
|
|
529
|
+
// ЖЦ §stop, Демон §stopped). Unlike idle-reap (temporary), `stop` is an operator
|
|
530
|
+
// halt: refuse with stopped:true so the sender gets an explicit "stopped" error,
|
|
531
|
+
// not a generic "offline" — and no message is queued. `start` clears the flag.
|
|
532
|
+
if (isStopped(cfg, identity)) {
|
|
533
|
+
return {
|
|
534
|
+
status: 'FAILED',
|
|
535
|
+
woke: false,
|
|
536
|
+
runtime,
|
|
537
|
+
stopped: true,
|
|
538
|
+
reason: `"${args.personality}" (${runtime}) is stopped and not accepting messages; start it to resume`,
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
return withWakeLock(cfg, identity, async () => {
|
|
543
|
+
// Re-check the refusal gates INSIDE the lock (audit #3/#11): a `stop` (C1 flag) or a
|
|
544
|
+
// plist install that completed AFTER the pre-lock check but before the lock was
|
|
545
|
+
// acquired must still be honored — else a concurrently stopped / launchd-claimed peer
|
|
546
|
+
// could be spawned live-but-flagged. These re-checks are fail-SAFE (they only add a
|
|
547
|
+
// refusal). A stop racing DURING the spawn is a narrower window the wake-lock does not
|
|
548
|
+
// cover (stop does not take this lock).
|
|
549
|
+
if (isStopped(cfg, identity)) {
|
|
550
|
+
return { status: 'FAILED', woke: false, runtime, stopped: true, reason: `"${args.personality}" (${runtime}) is stopped and not accepting messages; start it to resume` }
|
|
551
|
+
}
|
|
552
|
+
if (isLaunchdManaged(args.personality, env)) {
|
|
553
|
+
return { status: 'FAILED', woke: false, runtime, reason: `"${args.personality}" became launchd-managed mid-wake; the daemon does not wake it` }
|
|
554
|
+
}
|
|
555
|
+
// Idempotent fast path inside the lock: a live session wins (a concurrent
|
|
556
|
+
// wake already brought it up) — no second spawn.
|
|
557
|
+
if (sessionAlive(sock, identity)) {
|
|
558
|
+
writeSessionState(cfg, { identity, runtime, personality: args.personality, cwd, wokeAt: Date.now() })
|
|
559
|
+
return { status: 'READY', woke: false, runtime, process_address: identity }
|
|
560
|
+
}
|
|
561
|
+
if (!existsSync(cwd)) {
|
|
562
|
+
return { status: 'FAILED', woke: false, runtime, reason: `peer cwd does not exist: ${cwd}` }
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// C3a + C4a — resolve resume vs fresh (extracted resolveWakeMode, contract ЖЦ
|
|
566
|
+
// §resume/fresh). An EXPLICIT resume that finds nothing to resume fails loud.
|
|
567
|
+
const mode = resolveWakeMode(cfg, identity, cwd, args.resume, c => adapter.resolveResume(c))
|
|
568
|
+
if (mode.failReason) return { status: 'FAILED', woke: false, runtime, reason: mode.failReason }
|
|
569
|
+
const resume = mode.resume
|
|
570
|
+
const resumeRef = mode.resumeRef
|
|
571
|
+
const fresh = !resume
|
|
572
|
+
|
|
573
|
+
// Compose the system prompt when the peer carries a doctrine (tui runtimes);
|
|
574
|
+
// a doctrine-less peer (throwaway) → undefined → a bare session.
|
|
575
|
+
const systemPromptFile = adapter.usesDoctrine
|
|
576
|
+
? composePeerPrompt(peer, cwd, identity, cfg, env, peersIndex.peers.map(publicPeerSummary))
|
|
577
|
+
: undefined
|
|
578
|
+
|
|
579
|
+
// Hand the fully-resolved spec to the launch primitive (HOW). lifecycle has
|
|
580
|
+
// made every WHEN/HOW-MANY decision (lock, registry, H4, runtime, resume).
|
|
581
|
+
const spec: LaunchSpec = {
|
|
582
|
+
personality: args.personality,
|
|
583
|
+
runtime,
|
|
584
|
+
cwd,
|
|
585
|
+
identity,
|
|
586
|
+
socketPath: sock,
|
|
587
|
+
systemPromptFile,
|
|
588
|
+
resume, // RESOLVED resume/fresh (C3a), not the raw caller flag
|
|
589
|
+
resumeRef,
|
|
590
|
+
extraArgs: [],
|
|
591
|
+
// Carry the peer's nature so the launch primitive can enforce an adapter's
|
|
592
|
+
// intelligence gate (telegram requires natural). From the registry record.
|
|
593
|
+
intelligence: peer.intelligence,
|
|
594
|
+
}
|
|
595
|
+
const launchCfg: LaunchConfig = {
|
|
596
|
+
claudeBin: cfg.claudeBin,
|
|
597
|
+
codexBin: cfg.codexBin,
|
|
598
|
+
sockDir: cfg.sockDir,
|
|
599
|
+
bootDeadlineSecs: cfg.bootDeadlineSecs,
|
|
600
|
+
readyGateSecs: cfg.readyGateSecs,
|
|
601
|
+
maxAgeSecs: cfg.maxAgeSecs,
|
|
602
|
+
logDir: cfg.logDir,
|
|
603
|
+
env,
|
|
604
|
+
}
|
|
605
|
+
// C2 — initial_prompt (launch-seed): on a FRESH wake, seed the first turn with
|
|
606
|
+
// the peer's initial_prompt BEFORE the routed message — the agent sees the
|
|
607
|
+
// opening directive, then the IAP message (with its from-* to reply to). NOT on
|
|
608
|
+
// resume (a resumed session already holds its context). Best-effort read: a
|
|
609
|
+
// profile read hiccup must never block the wake (the seed is optional).
|
|
610
|
+
const firstMessage = composeFirstMessage(cwd, args.task, fresh)
|
|
611
|
+
const result = await launch(spec, adapter, firstMessage, launchCfg)
|
|
612
|
+
if (result.status === 'FAILED') {
|
|
613
|
+
return { status: 'FAILED', woke: false, runtime, reason: result.reason }
|
|
614
|
+
}
|
|
615
|
+
// The session is up and the message delivered. Recording supervise-state must not
|
|
616
|
+
// turn a successful wake into a failure (audit #18): on a write hiccup the session is
|
|
617
|
+
// still bounded by its tmux self-TTL — log loudly rather than throw past a live spawn.
|
|
618
|
+
try {
|
|
619
|
+
writeSessionState(cfg, { identity, runtime, personality: args.personality, cwd, wokeAt: Date.now() })
|
|
620
|
+
} catch (e) {
|
|
621
|
+
process.stderr.write(
|
|
622
|
+
`[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
|
|
623
|
+
)
|
|
624
|
+
}
|
|
625
|
+
return { status: 'READY', woke: true, runtime, process_address: identity }
|
|
626
|
+
})
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
630
|
+
// Reap — kill a session (used by idle-reap / supervise; H4-guarded by callers)
|
|
631
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
632
|
+
|
|
633
|
+
export function killSession(sock: string, identity: string): void {
|
|
634
|
+
tmux(sock, 'kill-session', '-t', identity)
|
|
635
|
+
const sessions = tmux(sock, 'list-sessions', '-F', '#{session_name}').out
|
|
636
|
+
if (!sessions.trim()) {
|
|
637
|
+
tmux(sock, 'kill-server')
|
|
638
|
+
try {
|
|
639
|
+
rmSync(sock, { force: true })
|
|
640
|
+
} catch {
|
|
641
|
+
/* best-effort */
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
647
|
+
// superviseTick — the SINGLE H4-guarded reap pass (idle + zombie-gone)
|
|
648
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
649
|
+
//
|
|
650
|
+
// Walks only daemon-owned sessions (those wakeOrSpawn recorded a .session for —
|
|
651
|
+
// a launchd peer never has one). For each candidate the launchd-plist check is
|
|
652
|
+
// FIRST: a launchd-managed peer is skipped untouched (H4 — the daemon is
|
|
653
|
+
// read-only for it; reaping it would fight launchd KeepAlive on the live fleet).
|
|
654
|
+
// In this consolidation the idle-reap and the zombie-sweep are ONE guarded path,
|
|
655
|
+
// so no reap can bypass H4. Called by the daemon's supervise timer AND at the
|
|
656
|
+
// start of every wakeOrSpawn (heal strays before launching).
|
|
657
|
+
|
|
658
|
+
export interface SuperviseOutcome {
|
|
659
|
+
identity: string
|
|
660
|
+
action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
661
|
+
reason?: string
|
|
662
|
+
/** For 'needs-eager-fresh' (C4b): the peer to EAGERLY re-launch fresh (its session
|
|
663
|
+
* died carrying a /new-mark). The daemon timer drives the async relaunch. */
|
|
664
|
+
personality?: string
|
|
665
|
+
runtime?: Runtime
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
export interface SuperviseDeps {
|
|
669
|
+
env?: NodeJS.ProcessEnv
|
|
670
|
+
nowMs?: number
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): SuperviseOutcome[] {
|
|
674
|
+
const env = deps.env ?? process.env
|
|
675
|
+
const nowMs = deps.nowMs ?? Date.now()
|
|
676
|
+
const out: SuperviseOutcome[] = []
|
|
677
|
+
for (const s of readSessionStates(cfg)) {
|
|
678
|
+
// H4 — FIRST, before any reap. A launchd-managed peer is read-only.
|
|
679
|
+
if (isLaunchdManaged(s.personality, env)) {
|
|
680
|
+
out.push({ identity: s.identity, action: 'skipped-launchd' })
|
|
681
|
+
continue
|
|
682
|
+
}
|
|
683
|
+
const sock = buildSocketPath(s.runtime, s.personality, cfg.sockDir)
|
|
684
|
+
if (!sessionAlive(sock, s.identity)) {
|
|
685
|
+
removeSessionState(cfg, s.identity)
|
|
686
|
+
// C4b — a session that died carrying a /new-mark is a GRACEFUL завершение by the
|
|
687
|
+
// owner: re-launch EAGERLY as fresh (not lazily on the next message, the way a
|
|
688
|
+
// markless idle-reap death resumes). The mark is LEFT for the relaunch's
|
|
689
|
+
// resolveWakeMode to consume; the daemon timer drives the async wakeOrSpawn.
|
|
690
|
+
if (hasNewMark(cfg, s.identity)) {
|
|
691
|
+
out.push({
|
|
692
|
+
identity: s.identity,
|
|
693
|
+
action: 'needs-eager-fresh',
|
|
694
|
+
reason: '/new graceful mark — eager fresh re-launch',
|
|
695
|
+
personality: s.personality,
|
|
696
|
+
runtime: s.runtime,
|
|
697
|
+
})
|
|
698
|
+
continue
|
|
699
|
+
}
|
|
700
|
+
out.push({ identity: s.identity, action: 'reaped-gone', reason: 'session no longer live' })
|
|
701
|
+
continue
|
|
702
|
+
}
|
|
703
|
+
// Idle accounting via the runtime adapter's activity proxy (claude transcript
|
|
704
|
+
// / codex session mtime); fall back to wokeAt so a session that produced zero
|
|
705
|
+
// further turns is still reaped, and when no adapter exists yet (codex/telegram
|
|
706
|
+
// pre-workflow-#2 — session-states are claude-only for now anyway).
|
|
707
|
+
let mt = s.wokeAt
|
|
708
|
+
try {
|
|
709
|
+
mt = getAdapter(s.runtime).newestActivityMtime(s.cwd) ?? s.wokeAt
|
|
710
|
+
} catch {
|
|
711
|
+
/* no adapter for this runtime yet → wokeAt fallback */
|
|
712
|
+
}
|
|
713
|
+
const ageSecs = Math.floor((nowMs - mt) / 1000)
|
|
714
|
+
if (ageSecs > cfg.idleSecs) {
|
|
715
|
+
killSession(sock, s.identity)
|
|
716
|
+
removeSessionState(cfg, s.identity)
|
|
717
|
+
out.push({ identity: s.identity, action: 'reaped-idle', reason: `idle ${ageSecs}s` })
|
|
718
|
+
} else {
|
|
719
|
+
out.push({ identity: s.identity, action: 'alive' })
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
return out
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* C4b — drive the EAGER fresh re-launch for peers superviseTick flagged
|
|
727
|
+
* 'needs-eager-fresh' (their session died carrying a /new graceful mark). Async +
|
|
728
|
+
* best-effort: task='' so the seed (initial_prompt) is self-sufficient (a /new has no
|
|
729
|
+
* incoming message — the agent auto-reports "I'm up" from the seed). resolveWakeMode
|
|
730
|
+
* consumes the /new-mark on the relaunch (→ fresh). A relaunch failure leaves the
|
|
731
|
+
* mark, so the peer still fresh-wakes on its next message — graceful degrades to lazy,
|
|
732
|
+
* never lost. NB: a /new'd peer is expected to carry an initial_prompt (the report
|
|
733
|
+
* directive); without one the seed is empty and the first turn delivers nothing.
|
|
734
|
+
*/
|
|
735
|
+
export async function processEagerRelaunches(
|
|
736
|
+
cfg: LifecycleConfig,
|
|
737
|
+
outcomes: SuperviseOutcome[],
|
|
738
|
+
deps: WakeDeps = {},
|
|
739
|
+
): Promise<WakeResult[]> {
|
|
740
|
+
const results: WakeResult[] = []
|
|
741
|
+
for (const o of outcomes) {
|
|
742
|
+
if (o.action !== 'needs-eager-fresh' || !o.personality || !o.runtime) continue
|
|
743
|
+
try {
|
|
744
|
+
results.push(
|
|
745
|
+
await wakeOrSpawn({ personality: o.personality, runtime: o.runtime, task: '' }, { cfg, env: deps.env }),
|
|
746
|
+
)
|
|
747
|
+
} catch (e) {
|
|
748
|
+
results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
return results
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
755
|
+
// folderLaunch / attachPeer — operator verbs (contract ЖЦ §Запуск из папки, §attach;
|
|
756
|
+
// Примитивы §Карта verbs). Both reuse wakeOrSpawn (one bring-up path); the difference
|
|
757
|
+
// is resume vs fresh and which runtime.
|
|
758
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
759
|
+
|
|
760
|
+
export interface FolderLaunchOptions {
|
|
761
|
+
cwd: string
|
|
762
|
+
runtime?: string
|
|
763
|
+
env?: NodeJS.ProcessEnv
|
|
764
|
+
cfg?: LifecycleConfig
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/**
|
|
768
|
+
* `iapeer <runtime>` (launch) — bring up the peer of the CURRENT cwd, ALWAYS FRESH
|
|
769
|
+
* (contract: folder-launch never resumes). personality/runtime come from the cwd's
|
|
770
|
+
* profile (resolveIdentity), not an arg. The fresh session carries the initial_prompt
|
|
771
|
+
* seed if the peer has one (composeFirstMessage), else a bare interactive session the
|
|
772
|
+
* operator drives. Goes through wakeOrSpawn (resume:false) so H4 / the wake-lock /
|
|
773
|
+
* the intelligence gate all apply — incl. H4 refusal for a launchd-managed peer (a
|
|
774
|
+
* fresh folder-launch alongside its launchd session would collide on the identity).
|
|
775
|
+
*/
|
|
776
|
+
export async function folderLaunch(opts: FolderLaunchOptions): Promise<WakeResult> {
|
|
777
|
+
const env = opts.env ?? process.env
|
|
778
|
+
const cfg = opts.cfg ?? loadLifecycleConfig(env)
|
|
779
|
+
const identity = resolveIdentity({ cwd: opts.cwd, env })
|
|
780
|
+
const runtime = opts.runtime ?? identity.runtime
|
|
781
|
+
const seed = composeFirstMessage(opts.cwd, '', true) // initial_prompt or '' (bare)
|
|
782
|
+
return wakeOrSpawn({ personality: identity.personality, runtime, task: seed, resume: false }, { cfg, env })
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
/**
|
|
786
|
+
* The runtime with the freshest transcript activity for a peer (contract: attach
|
|
787
|
+
* resolves the runtime by last-active transcript-mtime, NOT the profile default).
|
|
788
|
+
* undefined when no runtime has any activity (a never-run peer).
|
|
789
|
+
*/
|
|
790
|
+
export function lastActiveRuntime(peer: PeerRecord, cfg: LifecycleConfig): Runtime | undefined {
|
|
791
|
+
let best: Runtime | undefined
|
|
792
|
+
let bestMt = -1
|
|
793
|
+
for (const rt of peer.runtimes) {
|
|
794
|
+
try {
|
|
795
|
+
const mt = getAdapter(rt).newestActivityMtime(peer.cwd)
|
|
796
|
+
if (mt !== null && mt > bestMt) {
|
|
797
|
+
bestMt = mt
|
|
798
|
+
best = rt
|
|
799
|
+
}
|
|
800
|
+
} catch {
|
|
801
|
+
/* no adapter / no proxy for this runtime */
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
return best
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
export interface AttachOptions {
|
|
808
|
+
personality: string
|
|
809
|
+
runtime?: string
|
|
810
|
+
env?: NodeJS.ProcessEnv
|
|
811
|
+
cfg?: LifecycleConfig
|
|
812
|
+
}
|
|
813
|
+
export type AttachResult =
|
|
814
|
+
| { ok: true; identity: string; socketPath: string; woke: boolean; runtime: Runtime }
|
|
815
|
+
| { ok: false; reason: string }
|
|
816
|
+
|
|
817
|
+
/**
|
|
818
|
+
* `iapeer attach <peer> [runtime]` — ensure the peer is live, then hand back the
|
|
819
|
+
* socket/identity for the caller to `tmux attach`. ALWAYS RESUME (contract: attach
|
|
820
|
+
* never starts fresh). Runtime: explicit arg, else the LAST-ACTIVE runtime by
|
|
821
|
+
* transcript-mtime (not the profile default), else the profile default. A warm-live
|
|
822
|
+
* session is attached directly; a warm-asleep one is woken with --resume first
|
|
823
|
+
* (fail-loud if there is nothing to resume — a never-run peer must be folder-launched).
|
|
824
|
+
*/
|
|
825
|
+
export async function attachPeer(opts: AttachOptions): Promise<AttachResult> {
|
|
826
|
+
const env = opts.env ?? process.env
|
|
827
|
+
const cfg = opts.cfg ?? loadLifecycleConfig(env)
|
|
828
|
+
const peer = findPeer(readPeersIndex({ env }), opts.personality)
|
|
829
|
+
if (!peer) return { ok: false, reason: `peer "${opts.personality}" is not registered` }
|
|
830
|
+
const runtimeResult = resolveWakeRuntime(opts.runtime, peer)
|
|
831
|
+
if (opts.runtime && !runtimeResult.ok) return { ok: false, reason: runtimeResult.error.message }
|
|
832
|
+
// last-active (by mtime) wins over the profile default when no runtime is given.
|
|
833
|
+
const runtime = opts.runtime ?? lastActiveRuntime(peer, cfg) ?? peer.runtime
|
|
834
|
+
const identity = buildProcessAddress(runtime, opts.personality)
|
|
835
|
+
const sock = buildSocketPath(runtime, opts.personality, cfg.sockDir)
|
|
836
|
+
if (sessionAlive(sock, identity)) return { ok: true, identity, socketPath: sock, woke: false, runtime }
|
|
837
|
+
const woke = await wakeOrSpawn({ personality: opts.personality, runtime, task: '', resume: true }, { cfg, env })
|
|
838
|
+
if (woke.status === 'FAILED') return { ok: false, reason: woke.reason ?? 'wake failed' }
|
|
839
|
+
return { ok: true, identity, socketPath: sock, woke: true, runtime }
|
|
840
|
+
}
|