@agfpd/iapeer 0.2.10 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli/index.ts +24 -1
- package/src/daemon/daemon.test.ts +116 -3
- package/src/daemon/index.ts +58 -8
- package/src/daemon/main.test.ts +112 -0
- package/src/daemon/main.ts +87 -1
- package/src/index.ts +3 -0
- package/src/lifecycle/index.ts +174 -4
- package/src/lifecycle/lifecycle.test.ts +134 -0
- package/src/lifecycle/queue.test.ts +185 -0
- package/src/lifecycle/queue.ts +159 -0
- package/src/onboard/memory.test.ts +157 -0
- package/src/onboard/memory.ts +124 -0
- package/src/provision/index.ts +21 -0
- package/src/provision/provision.test.ts +57 -1
- package/src/status/index.ts +119 -0
- package/src/status/status.test.ts +125 -0
- package/src/transport/index.ts +39 -0
package/src/lifecycle/index.ts
CHANGED
|
@@ -24,10 +24,16 @@ import {
|
|
|
24
24
|
resolveSockDir,
|
|
25
25
|
type Runtime,
|
|
26
26
|
} from '../core/constants.ts'
|
|
27
|
-
import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
|
|
27
|
+
import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
|
|
28
28
|
import { err, ok, type Result } from '../core/errors.ts'
|
|
29
29
|
import { resolveGlobalRoot } from '../storage/index.ts'
|
|
30
30
|
import { readPeerProfile, resolveIdentity } from '../identity/index.ts'
|
|
31
|
+
import {
|
|
32
|
+
ephemeralQueueDepth,
|
|
33
|
+
listQueuedIdentities,
|
|
34
|
+
peekEphemeralTask,
|
|
35
|
+
removeEphemeralTask,
|
|
36
|
+
} from './queue.ts'
|
|
31
37
|
import { findPeer, publicPeerSummary, readPeersIndex, type PeerRecord, type PublicPeerSummary } from '../registry/index.ts'
|
|
32
38
|
// Ф3: launch = HOW to bring up ONE session (runtime-agnostic primitive + adapter).
|
|
33
39
|
// lifecycle decides WHEN/HOW-MANY and delegates the bring-up to launch.
|
|
@@ -65,6 +71,13 @@ export interface LifecycleConfig {
|
|
|
65
71
|
crashLoopMax: number
|
|
66
72
|
/** Crash-loop guard: the sliding window (seconds) the death count is measured over. */
|
|
67
73
|
crashLoopWindowSecs: number
|
|
74
|
+
/** wake_policy:ephemeral M2 — the quiet window (seconds of activity-proxy silence)
|
|
75
|
+
* after which an ARMED ephemeral session (it has sent its outbound reply) is
|
|
76
|
+
* reaped. Long enough for the post-reply housekeeping writes (operative notes)
|
|
77
|
+
* to keep resetting it; far below idleSecs. ASSUMPTION (boris, by design): the
|
|
78
|
+
* transcript mtime is a LIVENESS proxy — "no longer writing" — not a semantic
|
|
79
|
+
* "done" signal. */
|
|
80
|
+
ephemeralQuietSecs: number
|
|
68
81
|
}
|
|
69
82
|
|
|
70
83
|
export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
|
|
@@ -87,6 +100,7 @@ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): Lifec
|
|
|
87
100
|
maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
|
|
88
101
|
crashLoopMax: num(env.IAPEER_CRASHLOOP_MAX, 3),
|
|
89
102
|
crashLoopWindowSecs: num(env.IAPEER_CRASHLOOP_WINDOW_SECS, 300),
|
|
103
|
+
ephemeralQuietSecs: num(env.IAPEER_EPHEMERAL_QUIET_SECS, 20),
|
|
90
104
|
}
|
|
91
105
|
}
|
|
92
106
|
|
|
@@ -194,6 +208,14 @@ export function clearStopped(cfg: LifecycleConfig, identity: string): void {
|
|
|
194
208
|
// .deaths : crash-loop guard — a small JSON ring of recent death epoch-ms.
|
|
195
209
|
// .topic : the topic tag of the current/last session (executor fresh-vs-
|
|
196
210
|
// resume discriminator).
|
|
211
|
+
// .ephemeral-armed : wake_policy:ephemeral M2 — set by the DAEMON when it routes
|
|
212
|
+
// an OUTBOUND send from an ephemeral worker (its single final reply, ADR-006:
|
|
213
|
+
// workers send no intermediate messages, so outbound ⇒ the task is answered).
|
|
214
|
+
// An ARMED live session is reaped by superviseTick after a quiet window
|
|
215
|
+
// (die-after-reply). The marker belongs to the session that sent the outbound:
|
|
216
|
+
// it is cleared on the quiet-reap, on that session's own death, and on the
|
|
217
|
+
// next successful launch — it NEVER survives into a successor session (else a
|
|
218
|
+
// stray marker would quiet-reap the next task before its answer).
|
|
197
219
|
//
|
|
198
220
|
// Boolean markers carry an ISO timestamp line (audit-friendly); .deaths is a JSON
|
|
199
221
|
// array; .topic is the raw topic string.
|
|
@@ -247,6 +269,32 @@ export function clearNewEager(cfg: LifecycleConfig, identity: string): void {
|
|
|
247
269
|
}
|
|
248
270
|
}
|
|
249
271
|
|
|
272
|
+
function ephemeralArmedPath(cfg: LifecycleConfig, identity: string): string {
|
|
273
|
+
return join(cfg.stateDir, `${identity}.ephemeral-armed`)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/** True iff the identity's live ephemeral session has sent its outbound reply
|
|
277
|
+
* (→ quiet-reap candidate). */
|
|
278
|
+
export function hasEphemeralArmed(cfg: LifecycleConfig, identity: string): boolean {
|
|
279
|
+
return existsSync(ephemeralArmedPath(cfg, identity))
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** Arm the die-after-reply reap — ONLY the daemon's outbound seam does this
|
|
283
|
+
* (an ephemeral caller's send_to_peer was routed ok). */
|
|
284
|
+
export function setEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
|
|
285
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
286
|
+
writeFileSync(ephemeralArmedPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Clear the armed mark (quiet-reap done / the armed session died / a new launch). */
|
|
290
|
+
export function clearEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
|
|
291
|
+
try {
|
|
292
|
+
rmSync(ephemeralArmedPath(cfg, identity), { force: true })
|
|
293
|
+
} catch {
|
|
294
|
+
/* already gone */
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
250
298
|
function deathsPath(cfg: LifecycleConfig, identity: string): string {
|
|
251
299
|
return join(cfg.stateDir, `${identity}.deaths`)
|
|
252
300
|
}
|
|
@@ -367,7 +415,7 @@ function isHumanConversational(cwd: string): boolean {
|
|
|
367
415
|
* set, ephemeral WINS in resolveWakeMode (explicit policy beats the inferred human
|
|
368
416
|
* type) — provision warns on that combination; it should not occur for real workers.
|
|
369
417
|
*/
|
|
370
|
-
function isEphemeralPeer(cwd: string): boolean {
|
|
418
|
+
export function isEphemeralPeer(cwd: string): boolean {
|
|
371
419
|
try {
|
|
372
420
|
return readPeerProfile(cwd)?.wake_policy === 'ephemeral'
|
|
373
421
|
} catch {
|
|
@@ -855,6 +903,11 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
855
903
|
`[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
|
|
856
904
|
)
|
|
857
905
|
}
|
|
906
|
+
// A NEWLY launched session starts UNARMED by definition (no outbound yet) — clear
|
|
907
|
+
// any stray .ephemeral-armed so it can never quiet-reap this session before its
|
|
908
|
+
// reply. ONLY here on the actual launch path, NEVER on the live-session fast path
|
|
909
|
+
// above (a live session may be legitimately armed mid-quiet-window).
|
|
910
|
+
clearEphemeralArmed(cfg, identity)
|
|
858
911
|
// Establish the session's topic (executor discriminator) and reset the crash-loop
|
|
859
912
|
// ring — a successful wake means this is NOT a tight crash loop. Best-effort.
|
|
860
913
|
writeTopic(cfg, identity, args.topic?.trim() ?? '')
|
|
@@ -894,10 +947,12 @@ export function killSession(sock: string, identity: string): void {
|
|
|
894
947
|
|
|
895
948
|
export interface SuperviseOutcome {
|
|
896
949
|
identity: string
|
|
897
|
-
action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
950
|
+
action: 'reaped-idle' | 'reaped-gone' | 'reaped-ephemeral' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
898
951
|
reason?: string
|
|
899
952
|
/** For 'needs-eager-fresh': the peer to EAGERLY re-launch fresh (its session died
|
|
900
|
-
* carrying a .new-eager mark). The daemon timer drives the async relaunch.
|
|
953
|
+
* carrying a .new-eager mark). The daemon timer drives the async relaunch.
|
|
954
|
+
* Also set on 'reaped-ephemeral' — the M3 queue-drain hook needs the peer to
|
|
955
|
+
* wake fresh for the next queued task. */
|
|
901
956
|
personality?: string
|
|
902
957
|
runtime?: Runtime
|
|
903
958
|
}
|
|
@@ -932,6 +987,10 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
932
987
|
// the idle-reap below) → it died on its own → do NOT write .idle-reaped here.
|
|
933
988
|
recordDeath(cfg, s.identity, nowMs)
|
|
934
989
|
removeSessionState(cfg, s.identity)
|
|
990
|
+
// The .ephemeral-armed mark belongs to THIS (now dead) session — it armed on its
|
|
991
|
+
// outbound reply. Clear it with the session, so a successor session can never be
|
|
992
|
+
// quiet-reaped on a stale mark before answering its own task. No-op otherwise.
|
|
993
|
+
clearEphemeralArmed(cfg, s.identity)
|
|
935
994
|
// A session that died carrying a .new-eager mark is an owner /new: re-launch
|
|
936
995
|
// EAGERLY as fresh (not lazily on the next message). The mark is LEFT for the
|
|
937
996
|
// eager relaunch (processEagerRelaunches) to consume; the daemon timer drives it.
|
|
@@ -963,6 +1022,29 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
963
1022
|
/* no adapter for this runtime yet → wokeAt fallback */
|
|
964
1023
|
}
|
|
965
1024
|
const ageSecs = Math.floor((nowMs - mt) / 1000)
|
|
1025
|
+
// wake_policy:ephemeral M2 — die-after-reply: an ARMED ephemeral session (the
|
|
1026
|
+
// daemon routed its outbound reply) is reaped after a QUIET window, checked
|
|
1027
|
+
// BEFORE the idle branch (quiet ≪ idle). Quiet = the activity proxy stayed
|
|
1028
|
+
// silent for ephemeralQuietSecs — post-reply housekeeping (operative-note
|
|
1029
|
+
// writes) keeps resetting it, so the worker finishes its bookkeeping first.
|
|
1030
|
+
// NOT armed (still mid-task, e.g. a long silent tool run) → the ordinary
|
|
1031
|
+
// idle bound below is its only reaper. Deliberate, policy-driven death:
|
|
1032
|
+
// NO .idle-reaped (an ephemeral peer never resumes) and NO recordDeath
|
|
1033
|
+
// (the crash-loop ring counts faults, not policy reaps).
|
|
1034
|
+
if (isEphemeralPeer(s.cwd) && hasEphemeralArmed(cfg, s.identity) && ageSecs > cfg.ephemeralQuietSecs) {
|
|
1035
|
+
killSession(sock, s.identity)
|
|
1036
|
+
clearEphemeralArmed(cfg, s.identity)
|
|
1037
|
+
removeSessionState(cfg, s.identity)
|
|
1038
|
+
out.push({
|
|
1039
|
+
identity: s.identity,
|
|
1040
|
+
action: 'reaped-ephemeral',
|
|
1041
|
+
reason: `armed, quiet ${ageSecs}s`,
|
|
1042
|
+
personality: s.personality,
|
|
1043
|
+
runtime: s.runtime,
|
|
1044
|
+
})
|
|
1045
|
+
trace({ identity: s.identity, action: 'reaped-ephemeral', age: `${ageSecs}s`, outcome: 'ephemeral-done' })
|
|
1046
|
+
continue
|
|
1047
|
+
}
|
|
966
1048
|
if (ageSecs > cfg.idleSecs) {
|
|
967
1049
|
// THE ONLY place .idle-reaped is written: this is the one death the daemon
|
|
968
1050
|
// INITIATES. Its presence on the next wake = the session was parked cleanly =
|
|
@@ -1015,6 +1097,94 @@ export async function processEagerRelaunches(
|
|
|
1015
1097
|
return results
|
|
1016
1098
|
}
|
|
1017
1099
|
|
|
1100
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1101
|
+
// wake_policy:"ephemeral" M3 — the serial-queue DRAIN. Deliveries to an ephemeral
|
|
1102
|
+
// target are always ENQUEUED (transport's injected ephemeral seam → queue.ts);
|
|
1103
|
+
// this is the consumer side: feed the worker ONE task per fresh session.
|
|
1104
|
+
// Re-export the queue API so consumers (daemon main, tests) reach it through the
|
|
1105
|
+
// module index.
|
|
1106
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1107
|
+
|
|
1108
|
+
export {
|
|
1109
|
+
enqueueEphemeralTask,
|
|
1110
|
+
ephemeralQueueDepth,
|
|
1111
|
+
ephemeralQueueDir,
|
|
1112
|
+
listQueuedIdentities,
|
|
1113
|
+
peekEphemeralTask,
|
|
1114
|
+
removeEphemeralTask,
|
|
1115
|
+
type EphemeralQueueItem,
|
|
1116
|
+
type PeekedQueueItem,
|
|
1117
|
+
} from './queue.ts'
|
|
1118
|
+
|
|
1119
|
+
export interface DrainDeps {
|
|
1120
|
+
env?: NodeJS.ProcessEnv
|
|
1121
|
+
/** Injectable wake (tests); default wakeOrSpawn. */
|
|
1122
|
+
wakeFn?: (args: WakeArgs, deps: WakeDeps) => Promise<WakeResult>
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Feed ONE queued task to an ephemeral worker IFF it has no live session:
|
|
1127
|
+
* peek → wake FRESH (the task is the boot first-message; resolveWakeMode takes
|
|
1128
|
+
* the ephemeral-policy branch) → remove the item ONLY on READY. A FAILED wake
|
|
1129
|
+
* LEAVES the item at the head — the next supervise-tick drain retries it (the
|
|
1130
|
+
* crash-loop guard bounds a tight failure loop, its refusals land in
|
|
1131
|
+
* lifecycle.log). Returns null when there is nothing to do (empty queue, or a
|
|
1132
|
+
* session is still live — invariant: ≤1 live session = exactly one task).
|
|
1133
|
+
* Serialization: concurrent drains converge on wakeOrSpawn's per-identity
|
|
1134
|
+
* wake.lock — the loser takes the idempotent live-session fast path and the
|
|
1135
|
+
* item is removed once, delivered once.
|
|
1136
|
+
*/
|
|
1137
|
+
export async function drainEphemeralQueue(
|
|
1138
|
+
cfg: LifecycleConfig,
|
|
1139
|
+
personality: string,
|
|
1140
|
+
runtime: Runtime,
|
|
1141
|
+
deps: DrainDeps = {},
|
|
1142
|
+
): Promise<WakeResult | null> {
|
|
1143
|
+
const env = deps.env ?? process.env
|
|
1144
|
+
const identity = buildProcessAddress(runtime, personality)
|
|
1145
|
+
const sock = buildSocketPath(runtime, personality, cfg.sockDir)
|
|
1146
|
+
if (sessionAlive(sock, identity)) return null // one task per session — wait for its reap
|
|
1147
|
+
const item = peekEphemeralTask(cfg, identity)
|
|
1148
|
+
if (!item) return null
|
|
1149
|
+
// Durable drain trace (boris acceptance (a)): which item, how deep the queue.
|
|
1150
|
+
appendLifecycleEvent(
|
|
1151
|
+
cfg.eventLogDir,
|
|
1152
|
+
{ ev: 'ephemeral-drain', identity, seq: item.seq, depth: ephemeralQueueDepth(cfg, identity) },
|
|
1153
|
+
{ env },
|
|
1154
|
+
)
|
|
1155
|
+
const wake = deps.wakeFn ?? wakeOrSpawn
|
|
1156
|
+
const result = await wake({ personality, runtime, topic: item.topic, task: item.task }, { cfg, env })
|
|
1157
|
+
if (result.status === 'READY') removeEphemeralTask(cfg, identity, item.seq)
|
|
1158
|
+
return result
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
/**
|
|
1162
|
+
* Drain every identity with a non-empty queue and no live session — the daemon's
|
|
1163
|
+
* supervise-tick hook. ONE mechanism is the whole M3 delivery loop: the inline
|
|
1164
|
+
* kick after an enqueue covers the cold/empty case, and this periodic scan covers
|
|
1165
|
+
* (a) the next task after a reaped-ephemeral (same tick that reaped it),
|
|
1166
|
+
* (b) drain-on-start (the queue is durable across daemon restarts), and
|
|
1167
|
+
* (c) the RETRY of a failed wake (the item was left at the head). H4-guarded:
|
|
1168
|
+
* a launchd-managed peer is never woken by a drain (it should never have a
|
|
1169
|
+
* queue, but the guard is structural, not situational).
|
|
1170
|
+
*/
|
|
1171
|
+
export async function drainAllEphemeralQueues(cfg: LifecycleConfig, deps: DrainDeps = {}): Promise<WakeResult[]> {
|
|
1172
|
+
const env = deps.env ?? process.env
|
|
1173
|
+
const results: WakeResult[] = []
|
|
1174
|
+
for (const identity of listQueuedIdentities(cfg)) {
|
|
1175
|
+
const parsed = parseSessionName(identity)
|
|
1176
|
+
if (!parsed) continue
|
|
1177
|
+
if (isLaunchdManaged(parsed.personality, env)) continue
|
|
1178
|
+
try {
|
|
1179
|
+
const r = await drainEphemeralQueue(cfg, parsed.personality, parsed.runtime, deps)
|
|
1180
|
+
if (r) results.push(r)
|
|
1181
|
+
} catch (e) {
|
|
1182
|
+
results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
return results
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1018
1188
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
1019
1189
|
// folderLaunch / attachPeer — operator verbs (contract ЖЦ §Запуск из папки, §attach;
|
|
1020
1190
|
// Примитивы §Карта verbs). Both reuse wakeOrSpawn (one bring-up path); the difference
|
|
@@ -4,13 +4,16 @@ import { tmpdir } from 'os'
|
|
|
4
4
|
import { join } from 'path'
|
|
5
5
|
import {
|
|
6
6
|
attachPeer,
|
|
7
|
+
clearEphemeralArmed,
|
|
7
8
|
clearNewEager,
|
|
8
9
|
clearStopped,
|
|
9
10
|
composeFirstMessage,
|
|
10
11
|
countRecentDeaths,
|
|
11
12
|
folderLaunch,
|
|
13
|
+
hasEphemeralArmed,
|
|
12
14
|
hasIdleReaped,
|
|
13
15
|
hasNewEager,
|
|
16
|
+
isEphemeralPeer,
|
|
14
17
|
isLaunchdManaged,
|
|
15
18
|
isStopped,
|
|
16
19
|
lastActiveRuntime,
|
|
@@ -20,6 +23,7 @@ import {
|
|
|
20
23
|
recordDeath,
|
|
21
24
|
resolveWakeMode,
|
|
22
25
|
resolveWakeRuntime,
|
|
26
|
+
setEphemeralArmed,
|
|
23
27
|
setIdleReaped,
|
|
24
28
|
setNewEager,
|
|
25
29
|
setStopped,
|
|
@@ -29,6 +33,7 @@ import {
|
|
|
29
33
|
writeTopic,
|
|
30
34
|
type LifecycleConfig,
|
|
31
35
|
} from './index.ts'
|
|
36
|
+
import { spawnSync } from 'child_process'
|
|
32
37
|
import { upsertPeer, type PeerRecord } from '../registry/index.ts'
|
|
33
38
|
|
|
34
39
|
function peer(over: Partial<PeerRecord>): PeerRecord {
|
|
@@ -557,6 +562,135 @@ describe('superviseTick death-cause accounting (TARGET)', () => {
|
|
|
557
562
|
rmSync(laDir, { recursive: true, force: true })
|
|
558
563
|
}
|
|
559
564
|
})
|
|
565
|
+
|
|
566
|
+
test('a DEAD session clears a stale .ephemeral-armed (the mark dies with its session)', () => {
|
|
567
|
+
// The mark armed on the dead session's outbound; were it to survive, the NEXT
|
|
568
|
+
// session would be quiet-reap eligible BEFORE answering its own task.
|
|
569
|
+
const { env, cfg, root, laDir } = deadSessionEnv('q')
|
|
570
|
+
try {
|
|
571
|
+
setEphemeralArmed(cfg, 'claude-q')
|
|
572
|
+
const out = superviseTick(cfg, { env })
|
|
573
|
+
expect(out.find(x => x.identity === 'claude-q')?.action).toBe('reaped-gone')
|
|
574
|
+
expect(hasEphemeralArmed(cfg, 'claude-q')).toBe(false)
|
|
575
|
+
} finally {
|
|
576
|
+
rmSync(root, { recursive: true, force: true })
|
|
577
|
+
rmSync(laDir, { recursive: true, force: true })
|
|
578
|
+
}
|
|
579
|
+
})
|
|
580
|
+
})
|
|
581
|
+
|
|
582
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
583
|
+
// wake_policy:ephemeral M2 — armed marker + quiet-reap (die-after-reply)
|
|
584
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
585
|
+
|
|
586
|
+
describe('ephemeral-armed marker + config', () => {
|
|
587
|
+
test('set/has/clear round-trip; clear is idempotent', () => {
|
|
588
|
+
const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-eph-mark-'))
|
|
589
|
+
const cfg = { stateDir } as LifecycleConfig
|
|
590
|
+
try {
|
|
591
|
+
expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(false)
|
|
592
|
+
setEphemeralArmed(cfg, 'claude-e')
|
|
593
|
+
expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(true)
|
|
594
|
+
clearEphemeralArmed(cfg, 'claude-e')
|
|
595
|
+
clearEphemeralArmed(cfg, 'claude-e') // idempotent
|
|
596
|
+
expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(false)
|
|
597
|
+
} finally {
|
|
598
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
599
|
+
}
|
|
600
|
+
})
|
|
601
|
+
|
|
602
|
+
test('ephemeralQuietSecs: default 20, env-tunable', () => {
|
|
603
|
+
expect(loadLifecycleConfig({ HOME: '/tmp' } as NodeJS.ProcessEnv).ephemeralQuietSecs).toBe(20)
|
|
604
|
+
expect(
|
|
605
|
+
loadLifecycleConfig({ HOME: '/tmp', IAPEER_EPHEMERAL_QUIET_SECS: '45' } as NodeJS.ProcessEnv)
|
|
606
|
+
.ephemeralQuietSecs,
|
|
607
|
+
).toBe(45)
|
|
608
|
+
})
|
|
609
|
+
|
|
610
|
+
test('isEphemeralPeer keys on the cwd profile; read hiccup → false (safe default)', () => {
|
|
611
|
+
const eph = profileCwd(false, true)
|
|
612
|
+
const plain = profileCwd(false, false)
|
|
613
|
+
try {
|
|
614
|
+
expect(isEphemeralPeer(eph)).toBe(true)
|
|
615
|
+
expect(isEphemeralPeer(plain)).toBe(false)
|
|
616
|
+
expect(isEphemeralPeer('/tmp/definitely-no-such-peer-cwd')).toBe(false)
|
|
617
|
+
} finally {
|
|
618
|
+
rmSync(eph, { recursive: true, force: true })
|
|
619
|
+
rmSync(plain, { recursive: true, force: true })
|
|
620
|
+
}
|
|
621
|
+
})
|
|
622
|
+
})
|
|
623
|
+
|
|
624
|
+
describe('superviseTick quiet-reap (M2 die-after-reply, real tmux)', () => {
|
|
625
|
+
const tmuxAvailable = spawnSync('tmux', ['-V'], { stdio: 'ignore' }).status === 0
|
|
626
|
+
|
|
627
|
+
test.if(tmuxAvailable)(
|
|
628
|
+
'ARMED + quiet → reaped-ephemeral (killed, marks cleared, NO death/idle-reaped); unarmed/not-quiet → alive',
|
|
629
|
+
() => {
|
|
630
|
+
const root = mkdtempSync(join(tmpdir(), 'iapeer-eq-root-'))
|
|
631
|
+
const laDir = mkdtempSync(join(tmpdir(), 'iapeer-eq-la-')) // empty → not launchd-managed
|
|
632
|
+
const cwd = profileCwd(false, true) // ephemeral worker profile
|
|
633
|
+
const env = {
|
|
634
|
+
...process.env,
|
|
635
|
+
IAPEER_ROOT: root,
|
|
636
|
+
IAPEER_LAUNCHAGENTS_DIR: laDir,
|
|
637
|
+
IAPEER_SOCK_DIR: join(root, 'socks'),
|
|
638
|
+
}
|
|
639
|
+
const cfg = loadLifecycleConfig(env) // ephemeralQuietSecs 20 ≪ idleSecs 3600
|
|
640
|
+
const identity = 'claude-eq'
|
|
641
|
+
const sock = join(root, 'socks', 'tmux-iap-claude-eq.sock')
|
|
642
|
+
const writeState = (wokeAt: number) => {
|
|
643
|
+
mkdirSync(cfg.stateDir, { recursive: true })
|
|
644
|
+
writeFileSync(
|
|
645
|
+
join(cfg.stateDir, `${identity}.session`),
|
|
646
|
+
JSON.stringify({ identity, runtime: 'claude', personality: 'eq', cwd, wokeAt }),
|
|
647
|
+
)
|
|
648
|
+
}
|
|
649
|
+
const alive = () => spawnSync('tmux', ['-S', sock, 'has-session', '-t', identity]).status === 0
|
|
650
|
+
try {
|
|
651
|
+
mkdirSync(join(root, 'socks'), { recursive: true })
|
|
652
|
+
spawnSync('tmux', ['-S', sock, 'new-session', '-d', '-s', identity, 'sleep', '300'])
|
|
653
|
+
expect(alive()).toBe(true)
|
|
654
|
+
// no transcript in the temp cwd → activity proxy = wokeAt fallback:
|
|
655
|
+
// quiet age is fully controlled by the .session wokeAt below.
|
|
656
|
+
|
|
657
|
+
// 1) NOT armed + quiet-aged → alive (a silent long tool-run is NOT reaped:
|
|
658
|
+
// sleep-180 protection; only the ordinary idle bound applies).
|
|
659
|
+
writeState(Date.now() - 60_000) // age ~60s > quiet 20s, ≪ idle 3600s
|
|
660
|
+
expect(superviseTick(cfg, { env }).find(x => x.identity === identity)?.action).toBe('alive')
|
|
661
|
+
expect(alive()).toBe(true)
|
|
662
|
+
|
|
663
|
+
// 2) ARMED but NOT quiet → alive (post-reply housekeeping keeps it alive).
|
|
664
|
+
setEphemeralArmed(cfg, identity)
|
|
665
|
+
writeState(Date.now()) // age ~0 < quiet
|
|
666
|
+
expect(superviseTick(cfg, { env }).find(x => x.identity === identity)?.action).toBe('alive')
|
|
667
|
+
expect(alive()).toBe(true)
|
|
668
|
+
|
|
669
|
+
// 3) ARMED + quiet → reaped-ephemeral, with the M3 drain fields.
|
|
670
|
+
writeState(Date.now() - 60_000)
|
|
671
|
+
const o = superviseTick(cfg, { env }).find(x => x.identity === identity)
|
|
672
|
+
expect(o?.action).toBe('reaped-ephemeral')
|
|
673
|
+
expect(o?.personality).toBe('eq')
|
|
674
|
+
expect(o?.runtime).toBe('claude')
|
|
675
|
+
expect(alive()).toBe(false) // session killed
|
|
676
|
+
expect(hasEphemeralArmed(cfg, identity)).toBe(false) // mark consumed
|
|
677
|
+
expect(existsSync(join(cfg.stateDir, `${identity}.session`))).toBe(false)
|
|
678
|
+
// deliberate policy death: never resume-eligible, never a crash-loop count
|
|
679
|
+
expect(hasIdleReaped(cfg, identity)).toBe(false)
|
|
680
|
+
expect(readDeaths(cfg, identity).length).toBe(0)
|
|
681
|
+
// durable decision trace
|
|
682
|
+
const logged = readFileSync(join(cfg.eventLogDir, 'lifecycle.log'), 'utf8')
|
|
683
|
+
expect(logged).toContain(`action=reaped-ephemeral`)
|
|
684
|
+
expect(logged).toContain('outcome=ephemeral-done')
|
|
685
|
+
} finally {
|
|
686
|
+
spawnSync('tmux', ['-S', sock, 'kill-server'], { stdio: 'ignore' })
|
|
687
|
+
rmSync(root, { recursive: true, force: true })
|
|
688
|
+
rmSync(laDir, { recursive: true, force: true })
|
|
689
|
+
rmSync(cwd, { recursive: true, force: true })
|
|
690
|
+
}
|
|
691
|
+
},
|
|
692
|
+
30000,
|
|
693
|
+
)
|
|
560
694
|
})
|
|
561
695
|
|
|
562
696
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
// Ephemeral serial queue (M3) — FIFO primitives + the drain consumer.
|
|
2
|
+
// Retry semantics (boris acceptance (b)) are pinned here: a FAILED wake leaves
|
|
3
|
+
// the item at the head and the NEXT drain call retries the SAME task; only a
|
|
4
|
+
// READY wake consumes it. Strict FIFO order is asserted across drains.
|
|
5
|
+
|
|
6
|
+
import { afterEach, describe, expect, test } from 'bun:test'
|
|
7
|
+
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'
|
|
8
|
+
import { tmpdir } from 'os'
|
|
9
|
+
import { join } from 'path'
|
|
10
|
+
import { spawnSync } from 'child_process'
|
|
11
|
+
import {
|
|
12
|
+
drainAllEphemeralQueues,
|
|
13
|
+
drainEphemeralQueue,
|
|
14
|
+
enqueueEphemeralTask,
|
|
15
|
+
ephemeralQueueDepth,
|
|
16
|
+
ephemeralQueueDir,
|
|
17
|
+
listQueuedIdentities,
|
|
18
|
+
peekEphemeralTask,
|
|
19
|
+
removeEphemeralTask,
|
|
20
|
+
type LifecycleConfig,
|
|
21
|
+
type WakeArgs,
|
|
22
|
+
type WakeResult,
|
|
23
|
+
} from './index.ts'
|
|
24
|
+
|
|
25
|
+
const dirs: string[] = []
|
|
26
|
+
function mkTmp(): string {
|
|
27
|
+
const d = mkdtempSync(join(tmpdir(), 'iapeer-equeue-'))
|
|
28
|
+
dirs.push(d)
|
|
29
|
+
return d
|
|
30
|
+
}
|
|
31
|
+
afterEach(() => {
|
|
32
|
+
while (dirs.length) rmSync(dirs.pop()!, { recursive: true, force: true })
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
function mkCfg(): LifecycleConfig {
|
|
36
|
+
const root = mkTmp()
|
|
37
|
+
return {
|
|
38
|
+
stateDir: join(root, 'state'),
|
|
39
|
+
sockDir: join(root, 'socks'),
|
|
40
|
+
eventLogDir: join(root, 'logs'),
|
|
41
|
+
} as LifecycleConfig
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
describe('ephemeral queue primitives (FIFO)', () => {
|
|
45
|
+
test('enqueue returns depth; peek is non-destructive; remove consumes; strict FIFO', () => {
|
|
46
|
+
const cfg = mkCfg()
|
|
47
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
|
|
48
|
+
expect(peekEphemeralTask(cfg, 'claude-w')).toBeNull()
|
|
49
|
+
|
|
50
|
+
expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'first', topic: 't1' })).toBe(1)
|
|
51
|
+
expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'second' })).toBe(2)
|
|
52
|
+
expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'third', topic: 't3' })).toBe(3)
|
|
53
|
+
|
|
54
|
+
const head = peekEphemeralTask(cfg, 'claude-w')
|
|
55
|
+
expect(head?.task).toBe('first')
|
|
56
|
+
expect(head?.topic).toBe('t1')
|
|
57
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(3) // peek did not consume
|
|
58
|
+
|
|
59
|
+
removeEphemeralTask(cfg, 'claude-w', head!.seq)
|
|
60
|
+
removeEphemeralTask(cfg, 'claude-w', head!.seq) // idempotent
|
|
61
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(2)
|
|
62
|
+
expect(peekEphemeralTask(cfg, 'claude-w')?.task).toBe('second')
|
|
63
|
+
expect(peekEphemeralTask(cfg, 'claude-w')?.topic).toBeUndefined()
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('enqueue skips taken seq names (exclusive-create) — no overwrite of a pending task', () => {
|
|
67
|
+
const cfg = mkCfg()
|
|
68
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'one' })
|
|
69
|
+
// simulate a competitor that already claimed the next seq
|
|
70
|
+
writeFileSync(join(ephemeralQueueDir(cfg, 'claude-w'), '000002'), JSON.stringify({ task: 'competitor' }))
|
|
71
|
+
expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'three' })).toBe(3)
|
|
72
|
+
// all three distinct tasks live side-by-side
|
|
73
|
+
const dir = ephemeralQueueDir(cfg, 'claude-w')
|
|
74
|
+
expect(JSON.parse(readFileSync(join(dir, '000002'), 'utf8')).task).toBe('competitor')
|
|
75
|
+
expect(JSON.parse(readFileSync(join(dir, '000003'), 'utf8')).task).toBe('three')
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
test('a poison head (corrupt JSON) is dropped, not wedging the queue', () => {
|
|
79
|
+
const cfg = mkCfg()
|
|
80
|
+
mkdirSync(ephemeralQueueDir(cfg, 'claude-w'), { recursive: true })
|
|
81
|
+
writeFileSync(join(ephemeralQueueDir(cfg, 'claude-w'), '000001'), 'NOT JSON {{{')
|
|
82
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'good' })
|
|
83
|
+
const head = peekEphemeralTask(cfg, 'claude-w')
|
|
84
|
+
expect(head?.task).toBe('good')
|
|
85
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // poison slot dropped
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
test('listQueuedIdentities: only non-empty queues, sorted', () => {
|
|
89
|
+
const cfg = mkCfg()
|
|
90
|
+
enqueueEphemeralTask(cfg, 'claude-b', { task: 'x' })
|
|
91
|
+
enqueueEphemeralTask(cfg, 'claude-a', { task: 'y' })
|
|
92
|
+
mkdirSync(ephemeralQueueDir(cfg, 'claude-empty'), { recursive: true }) // empty dir → excluded
|
|
93
|
+
expect(listQueuedIdentities(cfg)).toEqual(['claude-a', 'claude-b'])
|
|
94
|
+
})
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
describe('drainEphemeralQueue (peek → wake → rm-on-READY)', () => {
|
|
98
|
+
function fakeWake(
|
|
99
|
+
script: Array<'READY' | 'FAILED'>,
|
|
100
|
+
calls: WakeArgs[],
|
|
101
|
+
): (args: WakeArgs) => Promise<WakeResult> {
|
|
102
|
+
return async args => {
|
|
103
|
+
calls.push(args)
|
|
104
|
+
const status = script[Math.min(calls.length - 1, script.length - 1)]!
|
|
105
|
+
return { status, woke: status === 'READY', runtime: 'claude' }
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
test('empty queue → null, wake NOT called', async () => {
|
|
110
|
+
const cfg = mkCfg()
|
|
111
|
+
const calls: WakeArgs[] = []
|
|
112
|
+
expect(await drainEphemeralQueue(cfg, 'w', 'claude', { wakeFn: fakeWake(['READY'], calls) })).toBeNull()
|
|
113
|
+
expect(calls).toEqual([])
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
test('READY consumes the head; successive drains feed tasks in STRICT FIFO order', async () => {
|
|
117
|
+
const cfg = mkCfg()
|
|
118
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'task-A', topic: 'ta' })
|
|
119
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'task-B' })
|
|
120
|
+
const calls: WakeArgs[] = []
|
|
121
|
+
const deps = { wakeFn: fakeWake(['READY'], calls) }
|
|
122
|
+
|
|
123
|
+
expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
|
|
124
|
+
expect(calls[0]).toMatchObject({ personality: 'w', runtime: 'claude', task: 'task-A', topic: 'ta' })
|
|
125
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // A consumed
|
|
126
|
+
|
|
127
|
+
expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
|
|
128
|
+
expect(calls[1]).toMatchObject({ task: 'task-B' }) // FIFO: B strictly after A
|
|
129
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
|
|
130
|
+
expect(await drainEphemeralQueue(cfg, 'w', 'claude', deps)).toBeNull() // drained dry
|
|
131
|
+
|
|
132
|
+
// durable drain trace (acceptance (a)): ev=ephemeral-drain with depth
|
|
133
|
+
const logged = readFileSync(join(cfg.eventLogDir, 'lifecycle.log'), 'utf8')
|
|
134
|
+
expect(logged).toContain('ev=ephemeral-drain')
|
|
135
|
+
expect(logged).toContain('identity=claude-w')
|
|
136
|
+
expect(logged).toContain('depth=2')
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
test('FAILED wake LEAVES the item at the head — the next drain RETRIES the same task (acceptance (b))', async () => {
|
|
140
|
+
const cfg = mkCfg()
|
|
141
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'flaky-task' })
|
|
142
|
+
const calls: WakeArgs[] = []
|
|
143
|
+
const deps = { wakeFn: fakeWake(['FAILED', 'READY'], calls) }
|
|
144
|
+
|
|
145
|
+
expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('FAILED')
|
|
146
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // NOT consumed on failure
|
|
147
|
+
|
|
148
|
+
expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
|
|
149
|
+
expect(calls.length).toBe(2)
|
|
150
|
+
expect(calls[1]?.task).toBe('flaky-task') // the SAME task, retried
|
|
151
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
const tmuxAvailable = spawnSync('tmux', ['-V'], { stdio: 'ignore' }).status === 0
|
|
155
|
+
test.if(tmuxAvailable)('a LIVE session blocks the drain (one task per session invariant)', async () => {
|
|
156
|
+
const cfg = mkCfg()
|
|
157
|
+
mkdirSync(cfg.sockDir, { recursive: true })
|
|
158
|
+
const sock = join(cfg.sockDir, 'tmux-iap-claude-w.sock')
|
|
159
|
+
enqueueEphemeralTask(cfg, 'claude-w', { task: 'queued-while-busy' })
|
|
160
|
+
const calls: WakeArgs[] = []
|
|
161
|
+
try {
|
|
162
|
+
spawnSync('tmux', ['-S', sock, 'new-session', '-d', '-s', 'claude-w', 'sleep', '60'])
|
|
163
|
+
expect(await drainEphemeralQueue(cfg, 'w', 'claude', { wakeFn: fakeWake(['READY'], calls) })).toBeNull()
|
|
164
|
+
expect(calls).toEqual([]) // no wake while the session lives
|
|
165
|
+
expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // task waits for the reap
|
|
166
|
+
} finally {
|
|
167
|
+
spawnSync('tmux', ['-S', sock, 'kill-server'], { stdio: 'ignore' })
|
|
168
|
+
}
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
test('drainAllEphemeralQueues: scans every queued identity, H4-skips launchd-managed', async () => {
|
|
172
|
+
const cfg = mkCfg()
|
|
173
|
+
const laDir = mkTmp()
|
|
174
|
+
const env = { ...process.env, IAPEER_LAUNCHAGENTS_DIR: laDir } as NodeJS.ProcessEnv
|
|
175
|
+
enqueueEphemeralTask(cfg, 'claude-free', { task: 'x' })
|
|
176
|
+
enqueueEphemeralTask(cfg, 'claude-held', { task: 'y' })
|
|
177
|
+
writeFileSync(join(laDir, 'com.iapeer.held.plist'), '') // 'held' is launchd-managed
|
|
178
|
+
const calls: WakeArgs[] = []
|
|
179
|
+
const results = await drainAllEphemeralQueues(cfg, { env, wakeFn: fakeWake(['READY'], calls) })
|
|
180
|
+
expect(results.length).toBe(1)
|
|
181
|
+
expect(calls.map(c => c.personality)).toEqual(['free']) // held NEVER woken (H4)
|
|
182
|
+
expect(ephemeralQueueDepth(cfg, 'claude-free')).toBe(0)
|
|
183
|
+
expect(ephemeralQueueDepth(cfg, 'claude-held')).toBe(1) // untouched
|
|
184
|
+
})
|
|
185
|
+
})
|