@agfpd/iapeer 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/index.ts +32 -1
- package/src/daemon/main.ts +2 -2
- package/src/lifecycle/index.ts +238 -52
- package/src/lifecycle/lifecycle.test.ts +199 -61
package/package.json
CHANGED
package/src/cli/index.ts
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
type Intelligence,
|
|
20
20
|
type Runtime,
|
|
21
21
|
} from '../core/constants.ts'
|
|
22
|
-
import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
|
|
22
|
+
import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
|
|
23
23
|
import { ensureGlobalIapScaffold } from '../storage/index.ts'
|
|
24
24
|
import { findPeer, readPeersIndex, type PeerRecord } from '../registry/index.ts'
|
|
25
25
|
import { isPeerLive, routeControl, routeSend, type WakeFn } from '../transport/index.ts'
|
|
@@ -31,6 +31,7 @@ import {
|
|
|
31
31
|
isStopped,
|
|
32
32
|
killSession,
|
|
33
33
|
loadLifecycleConfig,
|
|
34
|
+
setNewEager,
|
|
34
35
|
setStopped,
|
|
35
36
|
wakeOrSpawn,
|
|
36
37
|
} from '../lifecycle/index.ts'
|
|
@@ -314,6 +315,7 @@ const USAGE = `usage: iapeer <verb> [args]
|
|
|
314
315
|
attach <peer> [runtime] ensure-live + resume, then tmux attach
|
|
315
316
|
interrupt <peer> [runtime] interrupt the current turn (Escape) — context intact
|
|
316
317
|
compact <peer> [runtime] compact the peer's context (/compact)
|
|
318
|
+
self-fresh (agent self-call) mark /new eager-fresh + self-kill — the daemon relaunches fresh
|
|
317
319
|
`
|
|
318
320
|
|
|
319
321
|
export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.env): Promise<number> {
|
|
@@ -543,6 +545,35 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
|
|
|
543
545
|
if (!positionals[0] || !positionals[1]) return usage(errOut)
|
|
544
546
|
return await runAlwaysOn(positionals[0], positionals[1], process.cwd())
|
|
545
547
|
}
|
|
548
|
+
case 'self-fresh': {
|
|
549
|
+
// /new AGENT-FACING TRIGGER (TARGET redesign). Run BY the agent itself as the
|
|
550
|
+
// FINAL step of a /new graceful wind-down (the owner triggers it via a per-peer
|
|
551
|
+
// telegram alias: "write a handoff to durable memory, then run iapeer self-fresh"
|
|
552
|
+
// — the alias text is telegram-owned, NOT global doctrine). It: resolves the
|
|
553
|
+
// caller identity from PEER_IDENTITY (<runtime>-<personality>), writes the
|
|
554
|
+
// .new-eager mark, then self-kills the caller's OWN tmux session. The daemon's
|
|
555
|
+
// superviseTick then sees the dead session carrying .new-eager → eager fresh
|
|
556
|
+
// relaunch (with initial_prompt) so the agent reports it is back up.
|
|
557
|
+
const identity = env.PEER_IDENTITY?.trim()
|
|
558
|
+
if (!identity) {
|
|
559
|
+
errOut('self-fresh: PEER_IDENTITY is not set — this verb is an agent self-call from inside a session\n')
|
|
560
|
+
return 1
|
|
561
|
+
}
|
|
562
|
+
const addr = parseSessionName(identity)
|
|
563
|
+
if (!addr) {
|
|
564
|
+
errOut(`self-fresh: invalid PEER_IDENTITY "${identity}" — expected <runtime>-<personality>\n`)
|
|
565
|
+
return 1
|
|
566
|
+
}
|
|
567
|
+
const cfg = loadLifecycleConfig(env)
|
|
568
|
+
// Mark FIRST, kill SECOND: if the kill races ahead of the mark the daemon would
|
|
569
|
+
// see a dead session with no .new-eager → a plain reaped-gone (lazy fresh on the
|
|
570
|
+
// next message), not the eager relaunch — degrade gracefully, never lose the mark.
|
|
571
|
+
setNewEager(cfg, identity)
|
|
572
|
+
out(`self-fresh: marked ${identity} for eager fresh re-launch; self-killing session\n`)
|
|
573
|
+
const sock = buildSocketPath(addr.runtime, addr.personality, cfg.sockDir)
|
|
574
|
+
killSession(sock, identity)
|
|
575
|
+
return 0
|
|
576
|
+
}
|
|
546
577
|
case 'interrupt':
|
|
547
578
|
case 'compact': {
|
|
548
579
|
// In-session control (Ф-E, clean-slash namespace): interrupt a stuck/raving
|
package/src/daemon/main.ts
CHANGED
|
@@ -99,8 +99,8 @@ export async function startConfiguredDaemon(opts: ConfiguredDaemonOptions = {}):
|
|
|
99
99
|
wake: makeWakeFn(cfg, env),
|
|
100
100
|
supervise: {
|
|
101
101
|
intervalMs: opts.superviseIntervalMs ?? DEFAULT_SUPERVISE_INTERVAL_MS,
|
|
102
|
-
// idle-reap / zombie-sweep, THEN
|
|
103
|
-
// session died carrying a
|
|
102
|
+
// idle-reap / zombie-sweep, THEN the eager fresh re-launch for any peer whose
|
|
103
|
+
// session died carrying a .new-eager mark (owner /new; async, best-effort).
|
|
104
104
|
tick: async () => {
|
|
105
105
|
const outcomes = superviseTick(cfg, { env })
|
|
106
106
|
await processEagerRelaunches(cfg, outcomes, { env })
|
package/src/lifecycle/index.ts
CHANGED
|
@@ -55,6 +55,10 @@ export interface LifecycleConfig {
|
|
|
55
55
|
readyGateSecs: number
|
|
56
56
|
idleSecs: number
|
|
57
57
|
maxAgeSecs: number
|
|
58
|
+
/** Crash-loop guard: refuse to (re)launch after this many deaths within the window. */
|
|
59
|
+
crashLoopMax: number
|
|
60
|
+
/** Crash-loop guard: the sliding window (seconds) the death count is measured over. */
|
|
61
|
+
crashLoopWindowSecs: number
|
|
58
62
|
}
|
|
59
63
|
|
|
60
64
|
export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
|
|
@@ -74,6 +78,8 @@ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): Lifec
|
|
|
74
78
|
readyGateSecs: num(env.IAPEER_READY_GATE_SECS, 120),
|
|
75
79
|
idleSecs: num(env.IAPEER_IDLE_SECS, 3600),
|
|
76
80
|
maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
|
|
81
|
+
crashLoopMax: num(env.IAPEER_CRASHLOOP_MAX, 3),
|
|
82
|
+
crashLoopWindowSecs: num(env.IAPEER_CRASHLOOP_WINDOW_SECS, 300),
|
|
77
83
|
}
|
|
78
84
|
}
|
|
79
85
|
|
|
@@ -168,42 +174,155 @@ export function clearStopped(cfg: LifecycleConfig, identity: string): void {
|
|
|
168
174
|
}
|
|
169
175
|
|
|
170
176
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
171
|
-
//
|
|
172
|
-
//
|
|
173
|
-
//
|
|
174
|
-
//
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
+
// Lifecycle markers — the DAEMON decides fresh-vs-resume by the DEATH CAUSE it
|
|
178
|
+
// tracks itself (TARGET redesign). Plain files in state/lifecycle/<identity>.* :
|
|
179
|
+
//
|
|
180
|
+
// .idle-reaped : written ONLY when the daemon idle-reaps the session (the only
|
|
181
|
+
// death the daemon initiates). Presence on the next wake = session was parked
|
|
182
|
+
// cleanly = RESUME-eligible. ABSENT on a dead session = it died on its own
|
|
183
|
+
// (crash / self-close) = FRESH. (resolver branch 3.)
|
|
184
|
+
// .new-eager : set when /new is invoked (owner reset, via `iapeer self-fresh`).
|
|
185
|
+
// Presence on a dead session = the daemon EAGERLY relaunches FRESH (does NOT
|
|
186
|
+
// wait for a message) and injects initial_prompt. Consumed on the relaunch.
|
|
187
|
+
// .deaths : crash-loop guard — a small JSON ring of recent death epoch-ms.
|
|
188
|
+
// .topic : the topic tag of the current/last session (executor fresh-vs-
|
|
189
|
+
// resume discriminator).
|
|
190
|
+
//
|
|
191
|
+
// Boolean markers carry an ISO timestamp line (audit-friendly); .deaths is a JSON
|
|
192
|
+
// array; .topic is the raw topic string.
|
|
177
193
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
178
194
|
|
|
179
|
-
function
|
|
180
|
-
return join(cfg.stateDir, `${identity}.
|
|
195
|
+
function idleReapedPath(cfg: LifecycleConfig, identity: string): string {
|
|
196
|
+
return join(cfg.stateDir, `${identity}.idle-reaped`)
|
|
181
197
|
}
|
|
182
198
|
|
|
183
|
-
/** True iff the identity
|
|
184
|
-
export function
|
|
185
|
-
return existsSync(
|
|
199
|
+
/** True iff the identity was idle-reaped by the daemon (→ RESUME-eligible). */
|
|
200
|
+
export function hasIdleReaped(cfg: LifecycleConfig, identity: string): boolean {
|
|
201
|
+
return existsSync(idleReapedPath(cfg, identity))
|
|
186
202
|
}
|
|
187
203
|
|
|
188
|
-
/**
|
|
189
|
-
export function
|
|
204
|
+
/** Write the idle-reaped marker — ONLY the idle-reap path in superviseTick does this. */
|
|
205
|
+
export function setIdleReaped(cfg: LifecycleConfig, identity: string): void {
|
|
190
206
|
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
191
|
-
writeFileSync(
|
|
207
|
+
writeFileSync(idleReapedPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
192
208
|
}
|
|
193
209
|
|
|
194
|
-
/** Consume the
|
|
195
|
-
export function
|
|
210
|
+
/** Consume the idle-reaped marker (the resolver does this on the resume decision). */
|
|
211
|
+
export function clearIdleReaped(cfg: LifecycleConfig, identity: string): void {
|
|
196
212
|
try {
|
|
197
|
-
rmSync(
|
|
213
|
+
rmSync(idleReapedPath(cfg, identity), { force: true })
|
|
198
214
|
} catch {
|
|
199
215
|
/* already gone */
|
|
200
216
|
}
|
|
201
217
|
}
|
|
202
218
|
|
|
219
|
+
function newEagerPath(cfg: LifecycleConfig, identity: string): string {
|
|
220
|
+
return join(cfg.stateDir, `${identity}.new-eager`)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/** True iff the identity carries a /new eager-fresh mark (→ eager fresh + seed). */
|
|
224
|
+
export function hasNewEager(cfg: LifecycleConfig, identity: string): boolean {
|
|
225
|
+
return existsSync(newEagerPath(cfg, identity))
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** Set the /new eager-fresh mark (the `self-fresh` verb does this before self-kill). */
|
|
229
|
+
export function setNewEager(cfg: LifecycleConfig, identity: string): void {
|
|
230
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
231
|
+
writeFileSync(newEagerPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/** Consume the /new eager-fresh mark (the daemon does this on the eager relaunch). */
|
|
235
|
+
export function clearNewEager(cfg: LifecycleConfig, identity: string): void {
|
|
236
|
+
try {
|
|
237
|
+
rmSync(newEagerPath(cfg, identity), { force: true })
|
|
238
|
+
} catch {
|
|
239
|
+
/* already gone */
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function deathsPath(cfg: LifecycleConfig, identity: string): string {
|
|
244
|
+
return join(cfg.stateDir, `${identity}.deaths`)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/** Read the crash-loop death ring (epoch-ms timestamps). Garbage → empty. */
|
|
248
|
+
export function readDeaths(cfg: LifecycleConfig, identity: string): number[] {
|
|
249
|
+
try {
|
|
250
|
+
const arr = JSON.parse(readFileSync(deathsPath(cfg, identity), 'utf8'))
|
|
251
|
+
return Array.isArray(arr) ? arr.filter((n): n is number => typeof n === 'number' && Number.isFinite(n)) : []
|
|
252
|
+
} catch {
|
|
253
|
+
return []
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/** Append a death epoch-ms to the crash-loop ring (best-effort, bounded). */
|
|
258
|
+
export function recordDeath(cfg: LifecycleConfig, identity: string, nowMs: number = Date.now()): void {
|
|
259
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
260
|
+
// Keep the ring small — only the most recent matter for the window check.
|
|
261
|
+
const next = [...readDeaths(cfg, identity), nowMs].slice(-16)
|
|
262
|
+
try {
|
|
263
|
+
writeFileSync(deathsPath(cfg, identity), JSON.stringify(next), { mode: 0o600 })
|
|
264
|
+
} catch {
|
|
265
|
+
/* best-effort accounting; never block a reap */
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/** Count deaths within `windowSecs` of `nowMs` (crash-loop guard input). */
|
|
270
|
+
export function countRecentDeaths(
|
|
271
|
+
cfg: LifecycleConfig,
|
|
272
|
+
identity: string,
|
|
273
|
+
windowSecs: number,
|
|
274
|
+
nowMs: number = Date.now(),
|
|
275
|
+
): number {
|
|
276
|
+
const cutoff = nowMs - windowSecs * 1000
|
|
277
|
+
return readDeaths(cfg, identity).filter(t => t >= cutoff).length
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/** Trim the death ring to the window (called on a successful wake to reset the loop). */
|
|
281
|
+
export function trimDeaths(
|
|
282
|
+
cfg: LifecycleConfig,
|
|
283
|
+
identity: string,
|
|
284
|
+
windowSecs: number,
|
|
285
|
+
nowMs: number = Date.now(),
|
|
286
|
+
): void {
|
|
287
|
+
const cutoff = nowMs - windowSecs * 1000
|
|
288
|
+
const kept = readDeaths(cfg, identity).filter(t => t >= cutoff)
|
|
289
|
+
try {
|
|
290
|
+
if (kept.length === 0) rmSync(deathsPath(cfg, identity), { force: true })
|
|
291
|
+
else writeFileSync(deathsPath(cfg, identity), JSON.stringify(kept), { mode: 0o600 })
|
|
292
|
+
} catch {
|
|
293
|
+
/* best-effort */
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function topicPath(cfg: LifecycleConfig, identity: string): string {
|
|
298
|
+
return join(cfg.stateDir, `${identity}.topic`)
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/** Read the stored topic of the current/last session (executor discriminator). '' if none. */
|
|
302
|
+
export function readTopic(cfg: LifecycleConfig, identity: string): string {
|
|
303
|
+
try {
|
|
304
|
+
return readFileSync(topicPath(cfg, identity), 'utf8').trim()
|
|
305
|
+
} catch {
|
|
306
|
+
return ''
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/** Store the incoming topic for the established session (raw string, best-effort). */
|
|
311
|
+
export function writeTopic(cfg: LifecycleConfig, identity: string, topic: string): void {
|
|
312
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
313
|
+
try {
|
|
314
|
+
writeFileSync(topicPath(cfg, identity), topic, { mode: 0o600 })
|
|
315
|
+
} catch {
|
|
316
|
+
/* best-effort — topic is a discriminator hint, never blocks a wake */
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
203
320
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
204
|
-
// resolveWakeMode
|
|
205
|
-
//
|
|
206
|
-
//
|
|
321
|
+
// resolveWakeMode — the resume-vs-fresh decision (TARGET redesign). The DAEMON
|
|
322
|
+
// decides by the DEATH CAUSE it tracks (.idle-reaped marker), plus peer-type /
|
|
323
|
+
// topic — NOT an agent-dropped fresh mark. Takes the adapter's resolveResume as a
|
|
324
|
+
// parameter so it is unit-testable without a runtime. The .idle-reaped marker is
|
|
325
|
+
// CONSUMED when the default branch acts on it (a wake side-effect).
|
|
207
326
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
208
327
|
|
|
209
328
|
export interface WakeMode {
|
|
@@ -215,14 +334,33 @@ export interface WakeMode {
|
|
|
215
334
|
}
|
|
216
335
|
|
|
217
336
|
/**
|
|
218
|
-
*
|
|
219
|
-
*
|
|
220
|
-
*
|
|
221
|
-
*
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
337
|
+
* True iff the peer of `cwd` is a human-conversational peer — i.e. its local
|
|
338
|
+
* profile declares an `interfaces.telegram` binding (a telegram-fronted dialogue).
|
|
339
|
+
* Such a peer NEVER auto-freshes a resume-eligible wake; only an explicit /new
|
|
340
|
+
* (eager) resets it. A profile read hiccup → not-human (safe default: an executor).
|
|
341
|
+
*/
|
|
342
|
+
function isHumanConversational(cwd: string): boolean {
|
|
343
|
+
try {
|
|
344
|
+
const ifaces = readPeerProfile(cwd)?.interfaces
|
|
345
|
+
return !!(ifaces && ifaces.telegram != null)
|
|
346
|
+
} catch {
|
|
347
|
+
return false
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Decide resume vs fresh on a wake (TARGET redesign). Branch order:
|
|
353
|
+
* 1. argsResume === false (folder-launch `iapeer <runtime>`) → FRESH.
|
|
354
|
+
* 2. argsResume === true (attach) → RESUME; FAIL-LOUD via resolveResume if there
|
|
355
|
+
* is nothing to resume (failReason set; never a silent fresh fallback).
|
|
356
|
+
* 3. default (argsResume undefined — a message woke a dead/asleep peer):
|
|
357
|
+
* a. NOT hasIdleReaped → FRESH. (It died on its own: crash / self-close. A
|
|
358
|
+
* crash needs a CLEAN fresh, not a re-crashing resume of a broken context;
|
|
359
|
+
* durable handoff carries continuity.)
|
|
360
|
+
* b. hasIdleReaped (CONSUME the marker now) → resume-eligible, then by type:
|
|
361
|
+
* - human-conversational (interfaces.telegram present) → RESUME.
|
|
362
|
+
* - executor: incomingTopic non-empty AND differs from stored .topic →
|
|
363
|
+
* FRESH (new work); else (same topic, or no incoming topic) → RESUME.
|
|
226
364
|
*/
|
|
227
365
|
export function resolveWakeMode(
|
|
228
366
|
cfg: LifecycleConfig,
|
|
@@ -230,17 +368,30 @@ export function resolveWakeMode(
|
|
|
230
368
|
cwd: string,
|
|
231
369
|
argsResume: boolean | undefined,
|
|
232
370
|
resolveResume: (cwd: string) => { ok: boolean; ref?: string; reason?: string },
|
|
371
|
+
incomingTopic?: string,
|
|
233
372
|
): WakeMode {
|
|
234
|
-
|
|
235
|
-
clearNewMark(cfg, identity) // consume the graceful mark on the fresh re-launch
|
|
236
|
-
return { resume: false }
|
|
237
|
-
}
|
|
373
|
+
// 1. folder-launch → always fresh.
|
|
238
374
|
if (argsResume === false) return { resume: false }
|
|
375
|
+
// 2. attach → always resume, fail-loud if nothing to resume.
|
|
239
376
|
if (argsResume === true) {
|
|
240
377
|
const r = resolveResume(cwd)
|
|
241
378
|
if (!r.ok) return { resume: false, failReason: r.reason ?? 'resume requested but nothing to resume' }
|
|
242
379
|
return { resume: true, resumeRef: r.ref }
|
|
243
380
|
}
|
|
381
|
+
// 3. default (a message woke a dead/asleep peer): decide by the death cause.
|
|
382
|
+
// 3a. NOT idle-reaped → it died on its own (crash / self-close) → clean FRESH.
|
|
383
|
+
if (!hasIdleReaped(cfg, identity)) return { resume: false }
|
|
384
|
+
// 3b. idle-reaped → resume-eligible. Consume the marker now (it has done its job).
|
|
385
|
+
clearIdleReaped(cfg, identity)
|
|
386
|
+
// human-conversational dialogue never auto-freshes; only an explicit /new resets it.
|
|
387
|
+
if (isHumanConversational(cwd)) {
|
|
388
|
+
const r = resolveResume(cwd)
|
|
389
|
+
return r.ok ? { resume: true, resumeRef: r.ref } : { resume: false }
|
|
390
|
+
}
|
|
391
|
+
// executor: a NEW topic (non-empty and differing from the stored one) means new
|
|
392
|
+
// work → FRESH; same topic, or no incoming topic → continue the work → RESUME.
|
|
393
|
+
const topic = incomingTopic?.trim() ?? ''
|
|
394
|
+
if (topic && topic !== readTopic(cfg, identity)) return { resume: false }
|
|
244
395
|
const r = resolveResume(cwd)
|
|
245
396
|
return r.ok ? { resume: true, resumeRef: r.ref } : { resume: false }
|
|
246
397
|
}
|
|
@@ -562,9 +713,25 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
562
713
|
return { status: 'FAILED', woke: false, runtime, reason: `peer cwd does not exist: ${cwd}` }
|
|
563
714
|
}
|
|
564
715
|
|
|
565
|
-
//
|
|
566
|
-
//
|
|
567
|
-
|
|
716
|
+
// Crash-loop guard — BEFORE launching: if the peer has died crashLoopMax times
|
|
717
|
+
// within crashLoopWindowSecs, refuse to (re)launch and leave it asleep (a clear
|
|
718
|
+
// FAILED reason, not a silent fresh that re-crashes). A successful wake below
|
|
719
|
+
// trims the ring, so the guard only fires on a genuine tight loop.
|
|
720
|
+
const recentDeaths = countRecentDeaths(cfg, identity, cfg.crashLoopWindowSecs, Date.now())
|
|
721
|
+
if (recentDeaths >= cfg.crashLoopMax) {
|
|
722
|
+
return {
|
|
723
|
+
status: 'FAILED',
|
|
724
|
+
woke: false,
|
|
725
|
+
runtime,
|
|
726
|
+
reason: `crash-loop guard: ${recentDeaths} deaths in ${cfg.crashLoopWindowSecs}s, leaving asleep`,
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Resolve resume vs fresh (TARGET redesign resolveWakeMode): the daemon decides
|
|
731
|
+
// by the death cause (.idle-reaped marker) + peer-type/topic. An EXPLICIT resume
|
|
732
|
+
// that finds nothing to resume fails loud. incomingTopic (args.topic) is the
|
|
733
|
+
// executor discriminator.
|
|
734
|
+
const mode = resolveWakeMode(cfg, identity, cwd, args.resume, c => adapter.resolveResume(c), args.topic)
|
|
568
735
|
if (mode.failReason) return { status: 'FAILED', woke: false, runtime, reason: mode.failReason }
|
|
569
736
|
const resume = mode.resume
|
|
570
737
|
const resumeRef = mode.resumeRef
|
|
@@ -622,6 +789,10 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
622
789
|
`[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
|
|
623
790
|
)
|
|
624
791
|
}
|
|
792
|
+
// Establish the session's topic (executor discriminator) and reset the crash-loop
|
|
793
|
+
// ring — a successful wake means this is NOT a tight crash loop. Best-effort.
|
|
794
|
+
writeTopic(cfg, identity, args.topic?.trim() ?? '')
|
|
795
|
+
trimDeaths(cfg, identity, cfg.crashLoopWindowSecs, Date.now())
|
|
625
796
|
return { status: 'READY', woke: true, runtime, process_address: identity }
|
|
626
797
|
})
|
|
627
798
|
}
|
|
@@ -659,8 +830,8 @@ export interface SuperviseOutcome {
|
|
|
659
830
|
identity: string
|
|
660
831
|
action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
661
832
|
reason?: string
|
|
662
|
-
/** For 'needs-eager-fresh'
|
|
663
|
-
*
|
|
833
|
+
/** For 'needs-eager-fresh': the peer to EAGERLY re-launch fresh (its session died
|
|
834
|
+
* carrying a .new-eager mark). The daemon timer drives the async relaunch. */
|
|
664
835
|
personality?: string
|
|
665
836
|
runtime?: Runtime
|
|
666
837
|
}
|
|
@@ -682,21 +853,26 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
682
853
|
}
|
|
683
854
|
const sock = buildSocketPath(s.runtime, s.personality, cfg.sockDir)
|
|
684
855
|
if (!sessionAlive(sock, s.identity)) {
|
|
856
|
+
// A dead session: record a death for crash-loop accounting, then branch on the
|
|
857
|
+
// .new-eager mark. This death was NOT daemon-initiated (the daemon only initiates
|
|
858
|
+
// the idle-reap below) → it died on its own → do NOT write .idle-reaped here.
|
|
859
|
+
recordDeath(cfg, s.identity, nowMs)
|
|
685
860
|
removeSessionState(cfg, s.identity)
|
|
686
|
-
//
|
|
687
|
-
//
|
|
688
|
-
//
|
|
689
|
-
|
|
690
|
-
if (hasNewMark(cfg, s.identity)) {
|
|
861
|
+
// A session that died carrying a .new-eager mark is an owner /new: re-launch
|
|
862
|
+
// EAGERLY as fresh (not lazily on the next message). The mark is LEFT for the
|
|
863
|
+
// eager relaunch (processEagerRelaunches) to consume; the daemon timer drives it.
|
|
864
|
+
if (hasNewEager(cfg, s.identity)) {
|
|
691
865
|
out.push({
|
|
692
866
|
identity: s.identity,
|
|
693
867
|
action: 'needs-eager-fresh',
|
|
694
|
-
reason: '/new
|
|
868
|
+
reason: '/new eager mark — eager fresh re-launch',
|
|
695
869
|
personality: s.personality,
|
|
696
870
|
runtime: s.runtime,
|
|
697
871
|
})
|
|
698
872
|
continue
|
|
699
873
|
}
|
|
874
|
+
// Crash / self-close: NO marker written, NO eager relaunch — the peer stays
|
|
875
|
+
// asleep and wakes FRESH lazily on the next message (resolveWakeMode branch 3a).
|
|
700
876
|
out.push({ identity: s.identity, action: 'reaped-gone', reason: 'session no longer live' })
|
|
701
877
|
continue
|
|
702
878
|
}
|
|
@@ -712,7 +888,12 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
712
888
|
}
|
|
713
889
|
const ageSecs = Math.floor((nowMs - mt) / 1000)
|
|
714
890
|
if (ageSecs > cfg.idleSecs) {
|
|
891
|
+
// THE ONLY place .idle-reaped is written: this is the one death the daemon
|
|
892
|
+
// INITIATES. Its presence on the next wake = the session was parked cleanly =
|
|
893
|
+
// RESUME-eligible (resolveWakeMode branch 3b). A crash/self-close (the dead
|
|
894
|
+
// branch above) never writes it → that wakes FRESH (branch 3a).
|
|
715
895
|
killSession(sock, s.identity)
|
|
896
|
+
setIdleReaped(cfg, s.identity)
|
|
716
897
|
removeSessionState(cfg, s.identity)
|
|
717
898
|
out.push({ identity: s.identity, action: 'reaped-idle', reason: `idle ${ageSecs}s` })
|
|
718
899
|
} else {
|
|
@@ -723,14 +904,15 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
723
904
|
}
|
|
724
905
|
|
|
725
906
|
/**
|
|
726
|
-
*
|
|
727
|
-
*
|
|
728
|
-
*
|
|
729
|
-
*
|
|
730
|
-
*
|
|
731
|
-
*
|
|
732
|
-
*
|
|
733
|
-
*
|
|
907
|
+
* Drive the EAGER fresh re-launch for peers superviseTick flagged 'needs-eager-fresh'
|
|
908
|
+
* (their session died carrying a .new-eager mark — an owner /new). Async + best-effort:
|
|
909
|
+
* task='' so the seed (initial_prompt) is self-sufficient (a /new has no incoming message
|
|
910
|
+
* — the agent auto-reports "I'm up" from the seed). The relaunch is FRESH BY CONSTRUCTION:
|
|
911
|
+
* we CONSUME .new-eager here and pass resume:false so wakeOrSpawn's resolveWakeMode takes
|
|
912
|
+
* the folder-launch fresh branch WITHOUT consulting the death-cause markers. The mark is
|
|
913
|
+
* consumed BEFORE the relaunch so a relaunch failure does not loop on the same eager mark
|
|
914
|
+
* (it then fresh-wakes lazily on its next message — branch 3a — never lost). NB: a /new'd
|
|
915
|
+
* peer is expected to carry an initial_prompt; without one the first turn delivers nothing.
|
|
734
916
|
*/
|
|
735
917
|
export async function processEagerRelaunches(
|
|
736
918
|
cfg: LifecycleConfig,
|
|
@@ -740,9 +922,13 @@ export async function processEagerRelaunches(
|
|
|
740
922
|
const results: WakeResult[] = []
|
|
741
923
|
for (const o of outcomes) {
|
|
742
924
|
if (o.action !== 'needs-eager-fresh' || !o.personality || !o.runtime) continue
|
|
925
|
+
clearNewEager(cfg, o.identity) // consume the eager mark — the relaunch is fresh by construction
|
|
743
926
|
try {
|
|
744
927
|
results.push(
|
|
745
|
-
await wakeOrSpawn(
|
|
928
|
+
await wakeOrSpawn(
|
|
929
|
+
{ personality: o.personality, runtime: o.runtime, task: '', resume: false },
|
|
930
|
+
{ cfg, env: deps.env },
|
|
931
|
+
),
|
|
746
932
|
)
|
|
747
933
|
} catch (e) {
|
|
748
934
|
results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
|
|
@@ -4,22 +4,29 @@ import { tmpdir } from 'os'
|
|
|
4
4
|
import { join } from 'path'
|
|
5
5
|
import {
|
|
6
6
|
attachPeer,
|
|
7
|
-
|
|
7
|
+
clearNewEager,
|
|
8
8
|
clearStopped,
|
|
9
9
|
composeFirstMessage,
|
|
10
|
+
countRecentDeaths,
|
|
10
11
|
folderLaunch,
|
|
11
|
-
|
|
12
|
+
hasIdleReaped,
|
|
13
|
+
hasNewEager,
|
|
12
14
|
isLaunchdManaged,
|
|
13
15
|
isStopped,
|
|
14
16
|
lastActiveRuntime,
|
|
15
17
|
loadLifecycleConfig,
|
|
18
|
+
readDeaths,
|
|
19
|
+
readTopic,
|
|
20
|
+
recordDeath,
|
|
16
21
|
resolveWakeMode,
|
|
17
22
|
resolveWakeRuntime,
|
|
18
|
-
|
|
23
|
+
setIdleReaped,
|
|
24
|
+
setNewEager,
|
|
19
25
|
setStopped,
|
|
20
26
|
superviseTick,
|
|
21
27
|
wakeOrSpawn,
|
|
22
28
|
withWakeLock,
|
|
29
|
+
writeTopic,
|
|
23
30
|
type LifecycleConfig,
|
|
24
31
|
} from './index.ts'
|
|
25
32
|
import { upsertPeer, type PeerRecord } from '../registry/index.ts'
|
|
@@ -72,10 +79,12 @@ describe('resolveWakeRuntime (H5)', () => {
|
|
|
72
79
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
73
80
|
|
|
74
81
|
describe('isLaunchdManaged (H4 detector, live read-only)', () => {
|
|
75
|
-
test('a launchd-managed peer (
|
|
76
|
-
//
|
|
77
|
-
//
|
|
78
|
-
|
|
82
|
+
test('a launchd-managed peer (timer has com.iapeer.timer.plist) → true', () => {
|
|
83
|
+
// Post foundation-migration the always-on INFRA peers (timer/watcher/arthur) keep
|
|
84
|
+
// their com.iapeer.<p>.plist; the daemon must treat them READ-ONLY (never
|
|
85
|
+
// wake/reap). This proves the detector fires on the live fleet. (boris and the
|
|
86
|
+
// agent peers became warm-on-demand — plist relocated — so they are NOT managed.)
|
|
87
|
+
expect(isLaunchdManaged('timer')).toBe(true)
|
|
79
88
|
})
|
|
80
89
|
|
|
81
90
|
test('a made-up daemon-owned name (no plist) → false', () => {
|
|
@@ -338,54 +347,119 @@ describe('C2 initial_prompt (composeFirstMessage)', () => {
|
|
|
338
347
|
})
|
|
339
348
|
|
|
340
349
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
341
|
-
//
|
|
350
|
+
// resolveWakeMode — TARGET redesign (daemon decides fresh-vs-resume by DEATH CAUSE
|
|
351
|
+
// = .idle-reaped marker, plus peer-type/topic; NO agent-dropped fresh mark).
|
|
342
352
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
343
353
|
|
|
344
|
-
|
|
354
|
+
/** A temp cwd with a peer-profile; interfaces.telegram present → human-conversational. */
|
|
355
|
+
function profileCwd(human: boolean): string {
|
|
356
|
+
const cwd = mkdtempSync(join(tmpdir(), 'iapeer-wm-cwd-'))
|
|
357
|
+
mkdirSync(join(cwd, '.iapeer'), { recursive: true })
|
|
358
|
+
writeFileSync(
|
|
359
|
+
join(cwd, '.iapeer', 'peer-profile.json'),
|
|
360
|
+
JSON.stringify({
|
|
361
|
+
personality: 'p',
|
|
362
|
+
runtime: 'claude',
|
|
363
|
+
runtimes: ['claude'],
|
|
364
|
+
intelligence: human ? 'natural' : 'artificial',
|
|
365
|
+
...(human ? { interfaces: { telegram: { user_id: 1 } } } : {}),
|
|
366
|
+
}),
|
|
367
|
+
)
|
|
368
|
+
return cwd
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
describe('resolveWakeMode (TARGET: death-cause + peer-type/topic)', () => {
|
|
345
372
|
let stateDir: string
|
|
373
|
+
let cwds: string[]
|
|
346
374
|
beforeEach(() => {
|
|
347
375
|
stateDir = mkdtempSync(join(tmpdir(), 'iapeer-wakemode-'))
|
|
376
|
+
cwds = []
|
|
348
377
|
})
|
|
349
378
|
afterEach(() => {
|
|
350
379
|
rmSync(stateDir, { recursive: true, force: true })
|
|
380
|
+
for (const c of cwds) rmSync(c, { recursive: true, force: true })
|
|
351
381
|
})
|
|
352
382
|
const cfg = () => ({ stateDir } as LifecycleConfig)
|
|
383
|
+
const cwd = (human = false) => {
|
|
384
|
+
const c = profileCwd(human)
|
|
385
|
+
cwds.push(c)
|
|
386
|
+
return c
|
|
387
|
+
}
|
|
353
388
|
const hasTranscript = () => ({ ok: true, ref: 'uuid-1' })
|
|
354
389
|
const noTranscript = () => ({ ok: false, reason: 'no transcript to resume' })
|
|
355
390
|
|
|
356
|
-
|
|
357
|
-
|
|
391
|
+
// ── branch 1/2: explicit fresh / explicit resume (unchanged) ────────────────
|
|
392
|
+
test('argsResume=false (folder-launch) → FRESH', () => {
|
|
393
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), false, hasTranscript)).toEqual({ resume: false })
|
|
358
394
|
})
|
|
359
|
-
test('
|
|
360
|
-
expect(resolveWakeMode(cfg(), 'claude-p',
|
|
395
|
+
test('argsResume=true (attach) + transcript → RESUME', () => {
|
|
396
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), true, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
361
397
|
})
|
|
362
|
-
test('
|
|
363
|
-
const m = resolveWakeMode(cfg(), 'claude-p',
|
|
398
|
+
test('argsResume=true + nothing to resume → FAIL-LOUD (failReason, no silent fresh)', () => {
|
|
399
|
+
const m = resolveWakeMode(cfg(), 'claude-p', cwd(), true, noTranscript)
|
|
364
400
|
expect(m.resume).toBe(false)
|
|
365
401
|
expect(m.failReason).toMatch(/nothing to resume|no transcript/)
|
|
366
402
|
})
|
|
367
|
-
|
|
368
|
-
|
|
403
|
+
|
|
404
|
+
// ── branch 3a: default + NOT idle-reaped → it died on its own → FRESH ────────
|
|
405
|
+
test('DEFAULT + NOT idle-reaped (crash/self-close) → FRESH even when a transcript exists', () => {
|
|
406
|
+
// INVERSION of the old polarity: absence of the daemon's idle-reaped marker = died
|
|
407
|
+
// on its own = clean fresh, NOT a resume of a possibly-broken context.
|
|
408
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), undefined, hasTranscript)).toEqual({ resume: false })
|
|
409
|
+
})
|
|
410
|
+
|
|
411
|
+
// ── branch 3b: default + idle-reaped → resume-eligible, CONSUME the marker ───
|
|
412
|
+
test('DEFAULT + idle-reaped + human-conversational (interfaces.telegram) → RESUME, marker consumed', () => {
|
|
413
|
+
const c = cfg()
|
|
414
|
+
setIdleReaped(c, 'claude-p')
|
|
415
|
+
const human = cwd(true)
|
|
416
|
+
expect(resolveWakeMode(c, 'claude-p', human, undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
417
|
+
expect(hasIdleReaped(c, 'claude-p')).toBe(false) // consumed
|
|
418
|
+
})
|
|
419
|
+
test('DEFAULT + idle-reaped + executor + NO incoming topic → RESUME (continue the work)', () => {
|
|
420
|
+
const c = cfg()
|
|
421
|
+
setIdleReaped(c, 'claude-p')
|
|
422
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
423
|
+
})
|
|
424
|
+
test('DEFAULT + idle-reaped + executor + SAME topic → RESUME', () => {
|
|
425
|
+
const c = cfg()
|
|
426
|
+
setIdleReaped(c, 'claude-p')
|
|
427
|
+
writeTopic(c, 'claude-p', 'deploy')
|
|
428
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'deploy')).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
369
429
|
})
|
|
370
|
-
test('
|
|
430
|
+
test('DEFAULT + idle-reaped + executor + DIFFERENT topic → FRESH (new work), marker consumed', () => {
|
|
371
431
|
const c = cfg()
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
expect(resolveWakeMode(c, 'claude-p',
|
|
375
|
-
expect(
|
|
432
|
+
setIdleReaped(c, 'claude-p')
|
|
433
|
+
writeTopic(c, 'claude-p', 'deploy')
|
|
434
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'unrelated-bug')).toEqual({ resume: false })
|
|
435
|
+
expect(hasIdleReaped(c, 'claude-p')).toBe(false) // consumed even on the fresh executor branch
|
|
436
|
+
})
|
|
437
|
+
})
|
|
438
|
+
|
|
439
|
+
describe('idle-reaped marker round-trip', () => {
|
|
440
|
+
test('set/has/clear', () => {
|
|
441
|
+
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-idlereap-'))
|
|
442
|
+
const cfg = { stateDir } as LifecycleConfig
|
|
443
|
+
try {
|
|
444
|
+
expect(hasIdleReaped(cfg, 'claude-y')).toBe(false)
|
|
445
|
+
setIdleReaped(cfg, 'claude-y')
|
|
446
|
+
expect(hasIdleReaped(cfg, 'claude-y')).toBe(true)
|
|
447
|
+
} finally {
|
|
448
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
449
|
+
}
|
|
376
450
|
})
|
|
377
451
|
})
|
|
378
452
|
|
|
379
|
-
describe('
|
|
453
|
+
describe('new-eager marker round-trip', () => {
|
|
380
454
|
test('set/has/clear', () => {
|
|
381
|
-
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-
|
|
455
|
+
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-neweager-'))
|
|
382
456
|
const cfg = { stateDir } as LifecycleConfig
|
|
383
457
|
try {
|
|
384
|
-
expect(
|
|
385
|
-
|
|
386
|
-
expect(
|
|
387
|
-
|
|
388
|
-
expect(
|
|
458
|
+
expect(hasNewEager(cfg, 'claude-y')).toBe(false)
|
|
459
|
+
setNewEager(cfg, 'claude-y')
|
|
460
|
+
expect(hasNewEager(cfg, 'claude-y')).toBe(true)
|
|
461
|
+
clearNewEager(cfg, 'claude-y')
|
|
462
|
+
expect(hasNewEager(cfg, 'claude-y')).toBe(false)
|
|
389
463
|
} finally {
|
|
390
464
|
rmSync(stateDir, { recursive: true, force: true })
|
|
391
465
|
}
|
|
@@ -393,55 +467,54 @@ describe('C4a /new-mark round-trip', () => {
|
|
|
393
467
|
})
|
|
394
468
|
|
|
395
469
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
396
|
-
//
|
|
470
|
+
// superviseTick — death-cause accounting (TARGET redesign)
|
|
471
|
+
// • idle-reap is the ONLY place .idle-reaped is written
|
|
472
|
+
// • a crash/self-close death writes NO marker (lazy fresh on next message)
|
|
473
|
+
// • a dead session carrying .new-eager → needs-eager-fresh (mark LEFT for relaunch)
|
|
474
|
+
// • every dead session records a death (crash-loop accounting)
|
|
397
475
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
398
476
|
|
|
399
|
-
describe('
|
|
400
|
-
|
|
401
|
-
const root = mkdtempSync(join(tmpdir(), 'iapeer-
|
|
402
|
-
const laDir = mkdtempSync(join(tmpdir(), 'iapeer-
|
|
477
|
+
describe('superviseTick death-cause accounting (TARGET)', () => {
|
|
478
|
+
function deadSessionEnv(personality: string): { env: NodeJS.ProcessEnv; cfg: LifecycleConfig; root: string; laDir: string } {
|
|
479
|
+
const root = mkdtempSync(join(tmpdir(), 'iapeer-sup-tgt-root-'))
|
|
480
|
+
const laDir = mkdtempSync(join(tmpdir(), 'iapeer-sup-tgt-la-')) // empty → not launchd-managed
|
|
481
|
+
const env = { ...process.env, IAPEER_ROOT: root, IAPEER_LAUNCHAGENTS_DIR: laDir, IAPEER_SOCK_DIR: join(root, 'socks') }
|
|
482
|
+
const cfg = loadLifecycleConfig(env)
|
|
483
|
+
mkdirSync(cfg.stateDir, { recursive: true })
|
|
484
|
+
writeFileSync(
|
|
485
|
+
join(cfg.stateDir, `claude-${personality}.session`),
|
|
486
|
+
JSON.stringify({ identity: `claude-${personality}`, runtime: 'claude', personality, cwd: `/tmp/${personality}`, wokeAt: Date.now() }),
|
|
487
|
+
)
|
|
488
|
+
return { env, cfg, root, laDir }
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
test('a DEAD session carrying .new-eager → needs-eager-fresh (mark LEFT for relaunch), death recorded', () => {
|
|
492
|
+
const { env, cfg, root, laDir } = deadSessionEnv('z')
|
|
403
493
|
try {
|
|
404
|
-
|
|
405
|
-
...process.env,
|
|
406
|
-
IAPEER_ROOT: root,
|
|
407
|
-
IAPEER_LAUNCHAGENTS_DIR: laDir,
|
|
408
|
-
IAPEER_SOCK_DIR: join(root, 'socks'), // isolated, no live session here → dead
|
|
409
|
-
}
|
|
410
|
-
const cfg = loadLifecycleConfig(env)
|
|
411
|
-
mkdirSync(cfg.stateDir, { recursive: true })
|
|
412
|
-
// a session-state for a peer whose session is NOT live (dead)
|
|
413
|
-
writeFileSync(
|
|
414
|
-
join(cfg.stateDir, 'claude-z.session'),
|
|
415
|
-
JSON.stringify({ identity: 'claude-z', runtime: 'claude', personality: 'z', cwd: '/tmp/z', wokeAt: Date.now() }),
|
|
416
|
-
)
|
|
417
|
-
setNewMark(cfg, 'claude-z')
|
|
494
|
+
setNewEager(cfg, 'claude-z')
|
|
418
495
|
const out = superviseTick(cfg, { env })
|
|
419
496
|
const o = out.find(x => x.identity === 'claude-z')
|
|
420
497
|
expect(o?.action).toBe('needs-eager-fresh')
|
|
421
498
|
expect(o?.personality).toBe('z')
|
|
422
499
|
expect(o?.runtime).toBe('claude')
|
|
423
|
-
// the
|
|
424
|
-
|
|
425
|
-
|
|
500
|
+
// the eager mark is LEFT for processEagerRelaunches to consume
|
|
501
|
+
expect(hasNewEager(cfg, 'claude-z')).toBe(true)
|
|
502
|
+
// every dead session records a death for the crash-loop ring
|
|
503
|
+
expect(readDeaths(cfg, 'claude-z').length).toBe(1)
|
|
426
504
|
} finally {
|
|
427
505
|
rmSync(root, { recursive: true, force: true })
|
|
428
506
|
rmSync(laDir, { recursive: true, force: true })
|
|
429
507
|
}
|
|
430
508
|
})
|
|
431
509
|
|
|
432
|
-
test('a DEAD session with NO
|
|
433
|
-
const root =
|
|
434
|
-
const laDir = mkdtempSync(join(tmpdir(), 'iapeer-c4b2-la-'))
|
|
510
|
+
test('a DEAD session with NO .new-eager → reaped-gone, NO .idle-reaped written (crash leaves no marker)', () => {
|
|
511
|
+
const { env, cfg, root, laDir } = deadSessionEnv('w')
|
|
435
512
|
try {
|
|
436
|
-
const env = { ...process.env, IAPEER_ROOT: root, IAPEER_LAUNCHAGENTS_DIR: laDir, IAPEER_SOCK_DIR: join(root, 'socks') }
|
|
437
|
-
const cfg = loadLifecycleConfig(env)
|
|
438
|
-
mkdirSync(cfg.stateDir, { recursive: true })
|
|
439
|
-
writeFileSync(
|
|
440
|
-
join(cfg.stateDir, 'claude-w.session'),
|
|
441
|
-
JSON.stringify({ identity: 'claude-w', runtime: 'claude', personality: 'w', cwd: '/tmp/w', wokeAt: Date.now() }),
|
|
442
|
-
)
|
|
443
513
|
const out = superviseTick(cfg, { env })
|
|
444
514
|
expect(out.find(x => x.identity === 'claude-w')?.action).toBe('reaped-gone')
|
|
515
|
+
// a crash/self-close is NOT daemon-initiated → no idle-reaped marker → next wake FRESH
|
|
516
|
+
expect(hasIdleReaped(cfg, 'claude-w')).toBe(false)
|
|
517
|
+
expect(readDeaths(cfg, 'claude-w').length).toBe(1)
|
|
445
518
|
} finally {
|
|
446
519
|
rmSync(root, { recursive: true, force: true })
|
|
447
520
|
rmSync(laDir, { recursive: true, force: true })
|
|
@@ -449,6 +522,71 @@ describe('C4b eager fresh re-launch (superviseTick detection)', () => {
|
|
|
449
522
|
})
|
|
450
523
|
})
|
|
451
524
|
|
|
525
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
526
|
+
// Crash-loop guard — wakeOrSpawn refuses to (re)launch after N deaths in the window
|
|
527
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
528
|
+
|
|
529
|
+
describe('crash-loop guard', () => {
|
|
530
|
+
test('countRecentDeaths windows correctly; recordDeath rings', () => {
|
|
531
|
+
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-deaths-'))
|
|
532
|
+
const cfg = { stateDir } as LifecycleConfig
|
|
533
|
+
try {
|
|
534
|
+
const now = 1_000_000
|
|
535
|
+
recordDeath(cfg, 'claude-d', now - 400_000) // outside a 300s window
|
|
536
|
+
recordDeath(cfg, 'claude-d', now - 10_000)
|
|
537
|
+
recordDeath(cfg, 'claude-d', now - 5_000)
|
|
538
|
+
expect(countRecentDeaths(cfg, 'claude-d', 300, now)).toBe(2)
|
|
539
|
+
expect(countRecentDeaths(cfg, 'claude-d', 600, now)).toBe(3)
|
|
540
|
+
} finally {
|
|
541
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
542
|
+
}
|
|
543
|
+
})
|
|
544
|
+
|
|
545
|
+
test('wakeOrSpawn REFUSES after crashLoopMax deaths within the window (FAILED, no launch)', async () => {
|
|
546
|
+
const root = mkdtempSync(join(tmpdir(), 'iapeer-clg-root-'))
|
|
547
|
+
const laDir = mkdtempSync(join(tmpdir(), 'iapeer-clg-la-')) // empty → not launchd-managed
|
|
548
|
+
const peerCwd = mkdtempSync(join(tmpdir(), 'iapeer-clg-cwd-')) // REAL cwd so the cwd-existence check passes and the guard is what fires
|
|
549
|
+
try {
|
|
550
|
+
await upsertPeer(
|
|
551
|
+
{ personality: 'clg', runtime: 'claude', cwd: peerCwd, intelligence: 'artificial' },
|
|
552
|
+
{ rootDir: root },
|
|
553
|
+
)
|
|
554
|
+
const env = { ...process.env, IAPEER_ROOT: root, IAPEER_LAUNCHAGENTS_DIR: laDir, IAPEER_SOCK_DIR: join(root, 'socks'), IAPEER_CRASHLOOP_MAX: '3', IAPEER_CRASHLOOP_WINDOW_SECS: '300' }
|
|
555
|
+
const cfg = loadLifecycleConfig(env)
|
|
556
|
+
const now = Date.now()
|
|
557
|
+
recordDeath(cfg, 'claude-clg', now)
|
|
558
|
+
recordDeath(cfg, 'claude-clg', now)
|
|
559
|
+
recordDeath(cfg, 'claude-clg', now)
|
|
560
|
+
const r = await wakeOrSpawn({ personality: 'clg', runtime: 'claude', task: 'must not launch' }, { env })
|
|
561
|
+
expect(r.status).toBe('FAILED')
|
|
562
|
+
expect(r.woke).toBe(false)
|
|
563
|
+
expect(r.reason).toMatch(/crash-loop guard/)
|
|
564
|
+
} finally {
|
|
565
|
+
rmSync(root, { recursive: true, force: true })
|
|
566
|
+
rmSync(laDir, { recursive: true, force: true })
|
|
567
|
+
rmSync(peerCwd, { recursive: true, force: true })
|
|
568
|
+
}
|
|
569
|
+
})
|
|
570
|
+
})
|
|
571
|
+
|
|
572
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
573
|
+
// .topic — executor discriminator round-trip
|
|
574
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
575
|
+
|
|
576
|
+
describe('topic marker round-trip', () => {
|
|
577
|
+
test('writeTopic/readTopic', () => {
|
|
578
|
+
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-topic-'))
|
|
579
|
+
const cfg = { stateDir } as LifecycleConfig
|
|
580
|
+
try {
|
|
581
|
+
expect(readTopic(cfg, 'claude-t')).toBe('')
|
|
582
|
+
writeTopic(cfg, 'claude-t', 'deploy-pipeline')
|
|
583
|
+
expect(readTopic(cfg, 'claude-t')).toBe('deploy-pipeline')
|
|
584
|
+
} finally {
|
|
585
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
586
|
+
}
|
|
587
|
+
})
|
|
588
|
+
})
|
|
589
|
+
|
|
452
590
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
453
591
|
// Ф-D launch / attach — operator verbs (error paths; success paths are live-verified)
|
|
454
592
|
// ─────────────────────────────────────────────────────────────────────────────
|