@agfpd/iapeer 0.2.10 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,10 +24,16 @@ import {
24
24
  resolveSockDir,
25
25
  type Runtime,
26
26
  } from '../core/constants.ts'
27
- import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
27
+ import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
28
28
  import { err, ok, type Result } from '../core/errors.ts'
29
29
  import { resolveGlobalRoot } from '../storage/index.ts'
30
30
  import { readPeerProfile, resolveIdentity } from '../identity/index.ts'
31
+ import {
32
+ ephemeralQueueDepth,
33
+ listQueuedIdentities,
34
+ peekEphemeralTask,
35
+ removeEphemeralTask,
36
+ } from './queue.ts'
31
37
  import { findPeer, publicPeerSummary, readPeersIndex, type PeerRecord, type PublicPeerSummary } from '../registry/index.ts'
32
38
  // Ф3: launch = HOW to bring up ONE session (runtime-agnostic primitive + adapter).
33
39
  // lifecycle decides WHEN/HOW-MANY and delegates the bring-up to launch.
@@ -65,6 +71,13 @@ export interface LifecycleConfig {
65
71
  crashLoopMax: number
66
72
  /** Crash-loop guard: the sliding window (seconds) the death count is measured over. */
67
73
  crashLoopWindowSecs: number
74
+ /** wake_policy:ephemeral M2 — the quiet window (seconds of activity-proxy silence)
75
+ * after which an ARMED ephemeral session (it has sent its outbound reply) is
76
+ * reaped. Long enough for the post-reply housekeeping writes (operative notes)
77
+ * to keep resetting it; far below idleSecs. ASSUMPTION (boris, by design): the
78
+ * transcript mtime is a LIVENESS proxy — "no longer writing" — not a semantic
79
+ * "done" signal. */
80
+ ephemeralQuietSecs: number
68
81
  }
69
82
 
70
83
  export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
@@ -87,6 +100,7 @@ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): Lifec
87
100
  maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
88
101
  crashLoopMax: num(env.IAPEER_CRASHLOOP_MAX, 3),
89
102
  crashLoopWindowSecs: num(env.IAPEER_CRASHLOOP_WINDOW_SECS, 300),
103
+ ephemeralQuietSecs: num(env.IAPEER_EPHEMERAL_QUIET_SECS, 20),
90
104
  }
91
105
  }
92
106
 
@@ -194,6 +208,14 @@ export function clearStopped(cfg: LifecycleConfig, identity: string): void {
194
208
  // .deaths : crash-loop guard — a small JSON ring of recent death epoch-ms.
195
209
  // .topic : the topic tag of the current/last session (executor fresh-vs-
196
210
  // resume discriminator).
211
+ // .ephemeral-armed : wake_policy:ephemeral M2 — set by the DAEMON when it routes
212
+ // an OUTBOUND send from an ephemeral worker (its single final reply, ADR-006:
213
+ // workers send no intermediate messages, so outbound ⇒ the task is answered).
214
+ // An ARMED live session is reaped by superviseTick after a quiet window
215
+ // (die-after-reply). The marker belongs to the session that sent the outbound:
216
+ // it is cleared on the quiet-reap, on that session's own death, and on the
217
+ // next successful launch — it NEVER survives into a successor session (else a
218
+ // stray marker would quiet-reap the next task before its answer).
197
219
  //
198
220
  // Boolean markers carry an ISO timestamp line (audit-friendly); .deaths is a JSON
199
221
  // array; .topic is the raw topic string.
@@ -247,6 +269,32 @@ export function clearNewEager(cfg: LifecycleConfig, identity: string): void {
247
269
  }
248
270
  }
249
271
 
272
+ function ephemeralArmedPath(cfg: LifecycleConfig, identity: string): string {
273
+ return join(cfg.stateDir, `${identity}.ephemeral-armed`)
274
+ }
275
+
276
+ /** True iff the identity's live ephemeral session has sent its outbound reply
277
+ * (→ quiet-reap candidate). */
278
+ export function hasEphemeralArmed(cfg: LifecycleConfig, identity: string): boolean {
279
+ return existsSync(ephemeralArmedPath(cfg, identity))
280
+ }
281
+
282
+ /** Arm the die-after-reply reap — ONLY the daemon's outbound seam does this
283
+ * (an ephemeral caller's send_to_peer was routed ok). */
284
+ export function setEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
285
+ mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
286
+ writeFileSync(ephemeralArmedPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
287
+ }
288
+
289
+ /** Clear the armed mark (quiet-reap done / the armed session died / a new launch). */
290
+ export function clearEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
291
+ try {
292
+ rmSync(ephemeralArmedPath(cfg, identity), { force: true })
293
+ } catch {
294
+ /* already gone */
295
+ }
296
+ }
297
+
250
298
  function deathsPath(cfg: LifecycleConfig, identity: string): string {
251
299
  return join(cfg.stateDir, `${identity}.deaths`)
252
300
  }
@@ -367,7 +415,7 @@ function isHumanConversational(cwd: string): boolean {
367
415
  * set, ephemeral WINS in resolveWakeMode (explicit policy beats the inferred human
368
416
  * type) — provision warns on that combination; it should not occur for real workers.
369
417
  */
370
- function isEphemeralPeer(cwd: string): boolean {
418
+ export function isEphemeralPeer(cwd: string): boolean {
371
419
  try {
372
420
  return readPeerProfile(cwd)?.wake_policy === 'ephemeral'
373
421
  } catch {
@@ -855,6 +903,11 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
855
903
  `[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
856
904
  )
857
905
  }
906
+ // A NEWLY launched session starts UNARMED by definition (no outbound yet) — clear
907
+ // any stray .ephemeral-armed so it can never quiet-reap this session before its
908
+ // reply. ONLY here on the actual launch path, NEVER on the live-session fast path
909
+ // above (a live session may be legitimately armed mid-quiet-window).
910
+ clearEphemeralArmed(cfg, identity)
858
911
  // Establish the session's topic (executor discriminator) and reset the crash-loop
859
912
  // ring — a successful wake means this is NOT a tight crash loop. Best-effort.
860
913
  writeTopic(cfg, identity, args.topic?.trim() ?? '')
@@ -894,10 +947,12 @@ export function killSession(sock: string, identity: string): void {
894
947
 
895
948
  export interface SuperviseOutcome {
896
949
  identity: string
897
- action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
950
+ action: 'reaped-idle' | 'reaped-gone' | 'reaped-ephemeral' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
898
951
  reason?: string
899
952
  /** For 'needs-eager-fresh': the peer to EAGERLY re-launch fresh (its session died
900
- * carrying a .new-eager mark). The daemon timer drives the async relaunch. */
953
+ * carrying a .new-eager mark). The daemon timer drives the async relaunch.
954
+ * Also set on 'reaped-ephemeral' — the M3 queue-drain hook needs the peer to
955
+ * wake fresh for the next queued task. */
901
956
  personality?: string
902
957
  runtime?: Runtime
903
958
  }
@@ -932,6 +987,10 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
932
987
  // the idle-reap below) → it died on its own → do NOT write .idle-reaped here.
933
988
  recordDeath(cfg, s.identity, nowMs)
934
989
  removeSessionState(cfg, s.identity)
990
+ // The .ephemeral-armed mark belongs to THIS (now dead) session — it armed on its
991
+ // outbound reply. Clear it with the session, so a successor session can never be
992
+ // quiet-reaped on a stale mark before answering its own task. No-op otherwise.
993
+ clearEphemeralArmed(cfg, s.identity)
935
994
  // A session that died carrying a .new-eager mark is an owner /new: re-launch
936
995
  // EAGERLY as fresh (not lazily on the next message). The mark is LEFT for the
937
996
  // eager relaunch (processEagerRelaunches) to consume; the daemon timer drives it.
@@ -963,6 +1022,29 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
963
1022
  /* no adapter for this runtime yet → wokeAt fallback */
964
1023
  }
965
1024
  const ageSecs = Math.floor((nowMs - mt) / 1000)
1025
+ // wake_policy:ephemeral M2 — die-after-reply: an ARMED ephemeral session (the
1026
+ // daemon routed its outbound reply) is reaped after a QUIET window, checked
1027
+ // BEFORE the idle branch (quiet ≪ idle). Quiet = the activity proxy stayed
1028
+ // silent for ephemeralQuietSecs — post-reply housekeeping (operative-note
1029
+ // writes) keeps resetting it, so the worker finishes its bookkeeping first.
1030
+ // NOT armed (still mid-task, e.g. a long silent tool run) → the ordinary
1031
+ // idle bound below is its only reaper. Deliberate, policy-driven death:
1032
+ // NO .idle-reaped (an ephemeral peer never resumes) and NO recordDeath
1033
+ // (the crash-loop ring counts faults, not policy reaps).
1034
+ if (isEphemeralPeer(s.cwd) && hasEphemeralArmed(cfg, s.identity) && ageSecs > cfg.ephemeralQuietSecs) {
1035
+ killSession(sock, s.identity)
1036
+ clearEphemeralArmed(cfg, s.identity)
1037
+ removeSessionState(cfg, s.identity)
1038
+ out.push({
1039
+ identity: s.identity,
1040
+ action: 'reaped-ephemeral',
1041
+ reason: `armed, quiet ${ageSecs}s`,
1042
+ personality: s.personality,
1043
+ runtime: s.runtime,
1044
+ })
1045
+ trace({ identity: s.identity, action: 'reaped-ephemeral', age: `${ageSecs}s`, outcome: 'ephemeral-done' })
1046
+ continue
1047
+ }
966
1048
  if (ageSecs > cfg.idleSecs) {
967
1049
  // THE ONLY place .idle-reaped is written: this is the one death the daemon
968
1050
  // INITIATES. Its presence on the next wake = the session was parked cleanly =
@@ -1015,6 +1097,94 @@ export async function processEagerRelaunches(
1015
1097
  return results
1016
1098
  }
1017
1099
 
1100
+ // ─────────────────────────────────────────────────────────────────────────────
1101
+ // wake_policy:"ephemeral" M3 — the serial-queue DRAIN. Deliveries to an ephemeral
1102
+ // target are always ENQUEUED (transport's injected ephemeral seam → queue.ts);
1103
+ // this is the consumer side: feed the worker ONE task per fresh session.
1104
+ // Re-export the queue API so consumers (daemon main, tests) reach it through the
1105
+ // module index.
1106
+ // ─────────────────────────────────────────────────────────────────────────────
1107
+
1108
+ export {
1109
+ enqueueEphemeralTask,
1110
+ ephemeralQueueDepth,
1111
+ ephemeralQueueDir,
1112
+ listQueuedIdentities,
1113
+ peekEphemeralTask,
1114
+ removeEphemeralTask,
1115
+ type EphemeralQueueItem,
1116
+ type PeekedQueueItem,
1117
+ } from './queue.ts'
1118
+
1119
+ export interface DrainDeps {
1120
+ env?: NodeJS.ProcessEnv
1121
+ /** Injectable wake (tests); default wakeOrSpawn. */
1122
+ wakeFn?: (args: WakeArgs, deps: WakeDeps) => Promise<WakeResult>
1123
+ }
1124
+
1125
+ /**
1126
+ * Feed ONE queued task to an ephemeral worker IFF it has no live session:
1127
+ * peek → wake FRESH (the task is the boot first-message; resolveWakeMode takes
1128
+ * the ephemeral-policy branch) → remove the item ONLY on READY. A FAILED wake
1129
+ * LEAVES the item at the head — the next supervise-tick drain retries it (the
1130
+ * crash-loop guard bounds a tight failure loop, its refusals land in
1131
+ * lifecycle.log). Returns null when there is nothing to do (empty queue, or a
1132
+ * session is still live — invariant: ≤1 live session = exactly one task).
1133
+ * Serialization: concurrent drains converge on wakeOrSpawn's per-identity
1134
+ * wake.lock — the loser takes the idempotent live-session fast path and the
1135
+ * item is removed once, delivered once.
1136
+ */
1137
+ export async function drainEphemeralQueue(
1138
+ cfg: LifecycleConfig,
1139
+ personality: string,
1140
+ runtime: Runtime,
1141
+ deps: DrainDeps = {},
1142
+ ): Promise<WakeResult | null> {
1143
+ const env = deps.env ?? process.env
1144
+ const identity = buildProcessAddress(runtime, personality)
1145
+ const sock = buildSocketPath(runtime, personality, cfg.sockDir)
1146
+ if (sessionAlive(sock, identity)) return null // one task per session — wait for its reap
1147
+ const item = peekEphemeralTask(cfg, identity)
1148
+ if (!item) return null
1149
+ // Durable drain trace (boris acceptance (a)): which item, how deep the queue.
1150
+ appendLifecycleEvent(
1151
+ cfg.eventLogDir,
1152
+ { ev: 'ephemeral-drain', identity, seq: item.seq, depth: ephemeralQueueDepth(cfg, identity) },
1153
+ { env },
1154
+ )
1155
+ const wake = deps.wakeFn ?? wakeOrSpawn
1156
+ const result = await wake({ personality, runtime, topic: item.topic, task: item.task }, { cfg, env })
1157
+ if (result.status === 'READY') removeEphemeralTask(cfg, identity, item.seq)
1158
+ return result
1159
+ }
1160
+
1161
+ /**
1162
+ * Drain every identity with a non-empty queue and no live session — the daemon's
1163
+ * supervise-tick hook. ONE mechanism is the whole M3 delivery loop: the inline
1164
+ * kick after an enqueue covers the cold/empty case, and this periodic scan covers
1165
+ * (a) the next task after a reaped-ephemeral (same tick that reaped it),
1166
+ * (b) drain-on-start (the queue is durable across daemon restarts), and
1167
+ * (c) the RETRY of a failed wake (the item was left at the head). H4-guarded:
1168
+ * a launchd-managed peer is never woken by a drain (it should never have a
1169
+ * queue, but the guard is structural, not situational).
1170
+ */
1171
+ export async function drainAllEphemeralQueues(cfg: LifecycleConfig, deps: DrainDeps = {}): Promise<WakeResult[]> {
1172
+ const env = deps.env ?? process.env
1173
+ const results: WakeResult[] = []
1174
+ for (const identity of listQueuedIdentities(cfg)) {
1175
+ const parsed = parseSessionName(identity)
1176
+ if (!parsed) continue
1177
+ if (isLaunchdManaged(parsed.personality, env)) continue
1178
+ try {
1179
+ const r = await drainEphemeralQueue(cfg, parsed.personality, parsed.runtime, deps)
1180
+ if (r) results.push(r)
1181
+ } catch (e) {
1182
+ results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
1183
+ }
1184
+ }
1185
+ return results
1186
+ }
1187
+
1018
1188
  // ─────────────────────────────────────────────────────────────────────────────
1019
1189
  // folderLaunch / attachPeer — operator verbs (contract ЖЦ §Запуск из папки, §attach;
1020
1190
  // Примитивы §Карта verbs). Both reuse wakeOrSpawn (one bring-up path); the difference
@@ -4,13 +4,16 @@ import { tmpdir } from 'os'
4
4
  import { join } from 'path'
5
5
  import {
6
6
  attachPeer,
7
+ clearEphemeralArmed,
7
8
  clearNewEager,
8
9
  clearStopped,
9
10
  composeFirstMessage,
10
11
  countRecentDeaths,
11
12
  folderLaunch,
13
+ hasEphemeralArmed,
12
14
  hasIdleReaped,
13
15
  hasNewEager,
16
+ isEphemeralPeer,
14
17
  isLaunchdManaged,
15
18
  isStopped,
16
19
  lastActiveRuntime,
@@ -20,6 +23,7 @@ import {
20
23
  recordDeath,
21
24
  resolveWakeMode,
22
25
  resolveWakeRuntime,
26
+ setEphemeralArmed,
23
27
  setIdleReaped,
24
28
  setNewEager,
25
29
  setStopped,
@@ -29,6 +33,7 @@ import {
29
33
  writeTopic,
30
34
  type LifecycleConfig,
31
35
  } from './index.ts'
36
+ import { spawnSync } from 'child_process'
32
37
  import { upsertPeer, type PeerRecord } from '../registry/index.ts'
33
38
 
34
39
  function peer(over: Partial<PeerRecord>): PeerRecord {
@@ -557,6 +562,135 @@ describe('superviseTick death-cause accounting (TARGET)', () => {
557
562
  rmSync(laDir, { recursive: true, force: true })
558
563
  }
559
564
  })
565
+
566
+ test('a DEAD session clears a stale .ephemeral-armed (the mark dies with its session)', () => {
567
+ // The mark armed on the dead session's outbound; were it to survive, the NEXT
568
+ // session would be quiet-reap eligible BEFORE answering its own task.
569
+ const { env, cfg, root, laDir } = deadSessionEnv('q')
570
+ try {
571
+ setEphemeralArmed(cfg, 'claude-q')
572
+ const out = superviseTick(cfg, { env })
573
+ expect(out.find(x => x.identity === 'claude-q')?.action).toBe('reaped-gone')
574
+ expect(hasEphemeralArmed(cfg, 'claude-q')).toBe(false)
575
+ } finally {
576
+ rmSync(root, { recursive: true, force: true })
577
+ rmSync(laDir, { recursive: true, force: true })
578
+ }
579
+ })
580
+ })
581
+
582
+ // ─────────────────────────────────────────────────────────────────────────────
583
+ // wake_policy:ephemeral M2 — armed marker + quiet-reap (die-after-reply)
584
+ // ─────────────────────────────────────────────────────────────────────────────
585
+
586
+ describe('ephemeral-armed marker + config', () => {
587
+ test('set/has/clear round-trip; clear is idempotent', () => {
588
+ const stateDir = mkdtempSync(join(tmpdir(), 'iapeer-eph-mark-'))
589
+ const cfg = { stateDir } as LifecycleConfig
590
+ try {
591
+ expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(false)
592
+ setEphemeralArmed(cfg, 'claude-e')
593
+ expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(true)
594
+ clearEphemeralArmed(cfg, 'claude-e')
595
+ clearEphemeralArmed(cfg, 'claude-e') // idempotent
596
+ expect(hasEphemeralArmed(cfg, 'claude-e')).toBe(false)
597
+ } finally {
598
+ rmSync(stateDir, { recursive: true, force: true })
599
+ }
600
+ })
601
+
602
+ test('ephemeralQuietSecs: default 20, env-tunable', () => {
603
+ expect(loadLifecycleConfig({ HOME: '/tmp' } as NodeJS.ProcessEnv).ephemeralQuietSecs).toBe(20)
604
+ expect(
605
+ loadLifecycleConfig({ HOME: '/tmp', IAPEER_EPHEMERAL_QUIET_SECS: '45' } as NodeJS.ProcessEnv)
606
+ .ephemeralQuietSecs,
607
+ ).toBe(45)
608
+ })
609
+
610
+ test('isEphemeralPeer keys on the cwd profile; read hiccup → false (safe default)', () => {
611
+ const eph = profileCwd(false, true)
612
+ const plain = profileCwd(false, false)
613
+ try {
614
+ expect(isEphemeralPeer(eph)).toBe(true)
615
+ expect(isEphemeralPeer(plain)).toBe(false)
616
+ expect(isEphemeralPeer('/tmp/definitely-no-such-peer-cwd')).toBe(false)
617
+ } finally {
618
+ rmSync(eph, { recursive: true, force: true })
619
+ rmSync(plain, { recursive: true, force: true })
620
+ }
621
+ })
622
+ })
623
+
624
+ describe('superviseTick quiet-reap (M2 die-after-reply, real tmux)', () => {
625
+ const tmuxAvailable = spawnSync('tmux', ['-V'], { stdio: 'ignore' }).status === 0
626
+
627
+ test.if(tmuxAvailable)(
628
+ 'ARMED + quiet → reaped-ephemeral (killed, marks cleared, NO death/idle-reaped); unarmed/not-quiet → alive',
629
+ () => {
630
+ const root = mkdtempSync(join(tmpdir(), 'iapeer-eq-root-'))
631
+ const laDir = mkdtempSync(join(tmpdir(), 'iapeer-eq-la-')) // empty → not launchd-managed
632
+ const cwd = profileCwd(false, true) // ephemeral worker profile
633
+ const env = {
634
+ ...process.env,
635
+ IAPEER_ROOT: root,
636
+ IAPEER_LAUNCHAGENTS_DIR: laDir,
637
+ IAPEER_SOCK_DIR: join(root, 'socks'),
638
+ }
639
+ const cfg = loadLifecycleConfig(env) // ephemeralQuietSecs 20 ≪ idleSecs 3600
640
+ const identity = 'claude-eq'
641
+ const sock = join(root, 'socks', 'tmux-iap-claude-eq.sock')
642
+ const writeState = (wokeAt: number) => {
643
+ mkdirSync(cfg.stateDir, { recursive: true })
644
+ writeFileSync(
645
+ join(cfg.stateDir, `${identity}.session`),
646
+ JSON.stringify({ identity, runtime: 'claude', personality: 'eq', cwd, wokeAt }),
647
+ )
648
+ }
649
+ const alive = () => spawnSync('tmux', ['-S', sock, 'has-session', '-t', identity]).status === 0
650
+ try {
651
+ mkdirSync(join(root, 'socks'), { recursive: true })
652
+ spawnSync('tmux', ['-S', sock, 'new-session', '-d', '-s', identity, 'sleep', '300'])
653
+ expect(alive()).toBe(true)
654
+ // no transcript in the temp cwd → activity proxy = wokeAt fallback:
655
+ // quiet age is fully controlled by the .session wokeAt below.
656
+
657
+ // 1) NOT armed + quiet-aged → alive (a silent long tool-run is NOT reaped:
658
+ // sleep-180 protection; only the ordinary idle bound applies).
659
+ writeState(Date.now() - 60_000) // age ~60s > quiet 20s, ≪ idle 3600s
660
+ expect(superviseTick(cfg, { env }).find(x => x.identity === identity)?.action).toBe('alive')
661
+ expect(alive()).toBe(true)
662
+
663
+ // 2) ARMED but NOT quiet → alive (post-reply housekeeping keeps it alive).
664
+ setEphemeralArmed(cfg, identity)
665
+ writeState(Date.now()) // age ~0 < quiet
666
+ expect(superviseTick(cfg, { env }).find(x => x.identity === identity)?.action).toBe('alive')
667
+ expect(alive()).toBe(true)
668
+
669
+ // 3) ARMED + quiet → reaped-ephemeral, with the M3 drain fields.
670
+ writeState(Date.now() - 60_000)
671
+ const o = superviseTick(cfg, { env }).find(x => x.identity === identity)
672
+ expect(o?.action).toBe('reaped-ephemeral')
673
+ expect(o?.personality).toBe('eq')
674
+ expect(o?.runtime).toBe('claude')
675
+ expect(alive()).toBe(false) // session killed
676
+ expect(hasEphemeralArmed(cfg, identity)).toBe(false) // mark consumed
677
+ expect(existsSync(join(cfg.stateDir, `${identity}.session`))).toBe(false)
678
+ // deliberate policy death: never resume-eligible, never a crash-loop count
679
+ expect(hasIdleReaped(cfg, identity)).toBe(false)
680
+ expect(readDeaths(cfg, identity).length).toBe(0)
681
+ // durable decision trace
682
+ const logged = readFileSync(join(cfg.eventLogDir, 'lifecycle.log'), 'utf8')
683
+ expect(logged).toContain(`action=reaped-ephemeral`)
684
+ expect(logged).toContain('outcome=ephemeral-done')
685
+ } finally {
686
+ spawnSync('tmux', ['-S', sock, 'kill-server'], { stdio: 'ignore' })
687
+ rmSync(root, { recursive: true, force: true })
688
+ rmSync(laDir, { recursive: true, force: true })
689
+ rmSync(cwd, { recursive: true, force: true })
690
+ }
691
+ },
692
+ 30000,
693
+ )
560
694
  })
561
695
 
562
696
  // ─────────────────────────────────────────────────────────────────────────────
@@ -0,0 +1,185 @@
1
+ // Ephemeral serial queue (M3) — FIFO primitives + the drain consumer.
2
+ // Retry semantics (boris acceptance (b)) are pinned here: a FAILED wake leaves
3
+ // the item at the head and the NEXT drain call retries the SAME task; only a
4
+ // READY wake consumes it. Strict FIFO order is asserted across drains.
5
+
6
+ import { afterEach, describe, expect, test } from 'bun:test'
7
+ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'
8
+ import { tmpdir } from 'os'
9
+ import { join } from 'path'
10
+ import { spawnSync } from 'child_process'
11
+ import {
12
+ drainAllEphemeralQueues,
13
+ drainEphemeralQueue,
14
+ enqueueEphemeralTask,
15
+ ephemeralQueueDepth,
16
+ ephemeralQueueDir,
17
+ listQueuedIdentities,
18
+ peekEphemeralTask,
19
+ removeEphemeralTask,
20
+ type LifecycleConfig,
21
+ type WakeArgs,
22
+ type WakeResult,
23
+ } from './index.ts'
24
+
25
+ const dirs: string[] = []
26
+ function mkTmp(): string {
27
+ const d = mkdtempSync(join(tmpdir(), 'iapeer-equeue-'))
28
+ dirs.push(d)
29
+ return d
30
+ }
31
+ afterEach(() => {
32
+ while (dirs.length) rmSync(dirs.pop()!, { recursive: true, force: true })
33
+ })
34
+
35
+ function mkCfg(): LifecycleConfig {
36
+ const root = mkTmp()
37
+ return {
38
+ stateDir: join(root, 'state'),
39
+ sockDir: join(root, 'socks'),
40
+ eventLogDir: join(root, 'logs'),
41
+ } as LifecycleConfig
42
+ }
43
+
44
+ describe('ephemeral queue primitives (FIFO)', () => {
45
+ test('enqueue returns depth; peek is non-destructive; remove consumes; strict FIFO', () => {
46
+ const cfg = mkCfg()
47
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
48
+ expect(peekEphemeralTask(cfg, 'claude-w')).toBeNull()
49
+
50
+ expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'first', topic: 't1' })).toBe(1)
51
+ expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'second' })).toBe(2)
52
+ expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'third', topic: 't3' })).toBe(3)
53
+
54
+ const head = peekEphemeralTask(cfg, 'claude-w')
55
+ expect(head?.task).toBe('first')
56
+ expect(head?.topic).toBe('t1')
57
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(3) // peek did not consume
58
+
59
+ removeEphemeralTask(cfg, 'claude-w', head!.seq)
60
+ removeEphemeralTask(cfg, 'claude-w', head!.seq) // idempotent
61
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(2)
62
+ expect(peekEphemeralTask(cfg, 'claude-w')?.task).toBe('second')
63
+ expect(peekEphemeralTask(cfg, 'claude-w')?.topic).toBeUndefined()
64
+ })
65
+
66
+ test('enqueue skips taken seq names (exclusive-create) — no overwrite of a pending task', () => {
67
+ const cfg = mkCfg()
68
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'one' })
69
+ // simulate a competitor that already claimed the next seq
70
+ writeFileSync(join(ephemeralQueueDir(cfg, 'claude-w'), '000002'), JSON.stringify({ task: 'competitor' }))
71
+ expect(enqueueEphemeralTask(cfg, 'claude-w', { task: 'three' })).toBe(3)
72
+ // all three distinct tasks live side-by-side
73
+ const dir = ephemeralQueueDir(cfg, 'claude-w')
74
+ expect(JSON.parse(readFileSync(join(dir, '000002'), 'utf8')).task).toBe('competitor')
75
+ expect(JSON.parse(readFileSync(join(dir, '000003'), 'utf8')).task).toBe('three')
76
+ })
77
+
78
+ test('a poison head (corrupt JSON) is dropped, not wedging the queue', () => {
79
+ const cfg = mkCfg()
80
+ mkdirSync(ephemeralQueueDir(cfg, 'claude-w'), { recursive: true })
81
+ writeFileSync(join(ephemeralQueueDir(cfg, 'claude-w'), '000001'), 'NOT JSON {{{')
82
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'good' })
83
+ const head = peekEphemeralTask(cfg, 'claude-w')
84
+ expect(head?.task).toBe('good')
85
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // poison slot dropped
86
+ })
87
+
88
+ test('listQueuedIdentities: only non-empty queues, sorted', () => {
89
+ const cfg = mkCfg()
90
+ enqueueEphemeralTask(cfg, 'claude-b', { task: 'x' })
91
+ enqueueEphemeralTask(cfg, 'claude-a', { task: 'y' })
92
+ mkdirSync(ephemeralQueueDir(cfg, 'claude-empty'), { recursive: true }) // empty dir → excluded
93
+ expect(listQueuedIdentities(cfg)).toEqual(['claude-a', 'claude-b'])
94
+ })
95
+ })
96
+
97
+ describe('drainEphemeralQueue (peek → wake → rm-on-READY)', () => {
98
+ function fakeWake(
99
+ script: Array<'READY' | 'FAILED'>,
100
+ calls: WakeArgs[],
101
+ ): (args: WakeArgs) => Promise<WakeResult> {
102
+ return async args => {
103
+ calls.push(args)
104
+ const status = script[Math.min(calls.length - 1, script.length - 1)]!
105
+ return { status, woke: status === 'READY', runtime: 'claude' }
106
+ }
107
+ }
108
+
109
+ test('empty queue → null, wake NOT called', async () => {
110
+ const cfg = mkCfg()
111
+ const calls: WakeArgs[] = []
112
+ expect(await drainEphemeralQueue(cfg, 'w', 'claude', { wakeFn: fakeWake(['READY'], calls) })).toBeNull()
113
+ expect(calls).toEqual([])
114
+ })
115
+
116
+ test('READY consumes the head; successive drains feed tasks in STRICT FIFO order', async () => {
117
+ const cfg = mkCfg()
118
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'task-A', topic: 'ta' })
119
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'task-B' })
120
+ const calls: WakeArgs[] = []
121
+ const deps = { wakeFn: fakeWake(['READY'], calls) }
122
+
123
+ expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
124
+ expect(calls[0]).toMatchObject({ personality: 'w', runtime: 'claude', task: 'task-A', topic: 'ta' })
125
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // A consumed
126
+
127
+ expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
128
+ expect(calls[1]).toMatchObject({ task: 'task-B' }) // FIFO: B strictly after A
129
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
130
+ expect(await drainEphemeralQueue(cfg, 'w', 'claude', deps)).toBeNull() // drained dry
131
+
132
+ // durable drain trace (acceptance (a)): ev=ephemeral-drain with depth
133
+ const logged = readFileSync(join(cfg.eventLogDir, 'lifecycle.log'), 'utf8')
134
+ expect(logged).toContain('ev=ephemeral-drain')
135
+ expect(logged).toContain('identity=claude-w')
136
+ expect(logged).toContain('depth=2')
137
+ })
138
+
139
+ test('FAILED wake LEAVES the item at the head — the next drain RETRIES the same task (acceptance (b))', async () => {
140
+ const cfg = mkCfg()
141
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'flaky-task' })
142
+ const calls: WakeArgs[] = []
143
+ const deps = { wakeFn: fakeWake(['FAILED', 'READY'], calls) }
144
+
145
+ expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('FAILED')
146
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // NOT consumed on failure
147
+
148
+ expect((await drainEphemeralQueue(cfg, 'w', 'claude', deps))?.status).toBe('READY')
149
+ expect(calls.length).toBe(2)
150
+ expect(calls[1]?.task).toBe('flaky-task') // the SAME task, retried
151
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(0)
152
+ })
153
+
154
+ const tmuxAvailable = spawnSync('tmux', ['-V'], { stdio: 'ignore' }).status === 0
155
+ test.if(tmuxAvailable)('a LIVE session blocks the drain (one task per session invariant)', async () => {
156
+ const cfg = mkCfg()
157
+ mkdirSync(cfg.sockDir, { recursive: true })
158
+ const sock = join(cfg.sockDir, 'tmux-iap-claude-w.sock')
159
+ enqueueEphemeralTask(cfg, 'claude-w', { task: 'queued-while-busy' })
160
+ const calls: WakeArgs[] = []
161
+ try {
162
+ spawnSync('tmux', ['-S', sock, 'new-session', '-d', '-s', 'claude-w', 'sleep', '60'])
163
+ expect(await drainEphemeralQueue(cfg, 'w', 'claude', { wakeFn: fakeWake(['READY'], calls) })).toBeNull()
164
+ expect(calls).toEqual([]) // no wake while the session lives
165
+ expect(ephemeralQueueDepth(cfg, 'claude-w')).toBe(1) // task waits for the reap
166
+ } finally {
167
+ spawnSync('tmux', ['-S', sock, 'kill-server'], { stdio: 'ignore' })
168
+ }
169
+ })
170
+
171
+ test('drainAllEphemeralQueues: scans every queued identity, H4-skips launchd-managed', async () => {
172
+ const cfg = mkCfg()
173
+ const laDir = mkTmp()
174
+ const env = { ...process.env, IAPEER_LAUNCHAGENTS_DIR: laDir } as NodeJS.ProcessEnv
175
+ enqueueEphemeralTask(cfg, 'claude-free', { task: 'x' })
176
+ enqueueEphemeralTask(cfg, 'claude-held', { task: 'y' })
177
+ writeFileSync(join(laDir, 'com.iapeer.held.plist'), '') // 'held' is launchd-managed
178
+ const calls: WakeArgs[] = []
179
+ const results = await drainAllEphemeralQueues(cfg, { env, wakeFn: fakeWake(['READY'], calls) })
180
+ expect(results.length).toBe(1)
181
+ expect(calls.map(c => c.personality)).toEqual(['free']) // held NEVER woken (H4)
182
+ expect(ephemeralQueueDepth(cfg, 'claude-free')).toBe(0)
183
+ expect(ephemeralQueueDepth(cfg, 'claude-held')).toBe(1) // untouched
184
+ })
185
+ })