@agfpd/iapeer 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agfpd/iapeer",
3
- "version": "0.1.2",
3
+ "version": "0.2.1",
4
4
  "description": "Foundation core for the IAPeer multi-agent ecosystem: identity, registry, storage, codec.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -21,7 +21,14 @@
21
21
  },
22
22
  "scripts": {
23
23
  "test": "IAPEER_TEST_SANDBOX=1 bun test",
24
- "typecheck": "tsc --noEmit"
24
+ "typecheck": "tsc --noEmit",
25
+ "release": "npm version patch && npm publish && git push --follow-tags",
26
+ "release:minor": "npm version minor && npm publish && git push --follow-tags",
27
+ "release:major": "npm version major && npm publish && git push --follow-tags",
28
+ "prepublishOnly": "test -z \"$(git status --porcelain)\" || (echo 'release: working tree is dirty — commit or stash before release' >&2 && exit 1)"
29
+ },
30
+ "publishConfig": {
31
+ "access": "public"
25
32
  },
26
33
  "dependencies": {
27
34
  "@modelcontextprotocol/sdk": "1.29.0",
@@ -7,8 +7,8 @@ import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
7
7
  import { mkdtempSync, rmSync, writeFileSync } from 'fs'
8
8
  import { tmpdir } from 'os'
9
9
  import { join } from 'path'
10
- import { formatListTable, listPeers, parseArgs, sendMessage, startPeer, stopPeer } from './index.ts'
11
- import { upsertPeer } from '../registry/index.ts'
10
+ import { formatListTable, listPeers, parseArgs, removePeerCli, sendMessage, startPeer, stopPeer } from './index.ts'
11
+ import { findPeer, readPeersIndex, upsertPeer } from '../registry/index.ts'
12
12
  import { isStopped, loadLifecycleConfig, setStopped } from '../lifecycle/index.ts'
13
13
  import { launchdPlistPath } from '../launch/launchd.ts'
14
14
 
@@ -90,6 +90,27 @@ describe('FLEET GUARD (H4) — foreign persistent-peer launchd plist is off-limi
90
90
  })
91
91
  })
92
92
 
93
+ describe('remove (registry record via the locked writer)', () => {
94
+ test('removes a registered peer through registry.removePeer', async () => {
95
+ await register('zombie')
96
+ const e = env()
97
+ expect(findPeer(readPeersIndex({ env: e }), 'zombie')).not.toBeNull()
98
+ const o = await removePeerCli('zombie', { env: e })
99
+ expect(o.action).toBe('removed')
100
+ expect(findPeer(readPeersIndex({ env: e }), 'zombie')).toBeNull()
101
+ })
102
+ test('removing an absent peer is an idempotent no-op (not an error)', async () => {
103
+ const o = await removePeerCli('never-existed', { env: env() })
104
+ expect(o.action).toBe('absent')
105
+ })
106
+ test('a second remove of the same peer is also a no-op', async () => {
107
+ await register('twice')
108
+ const e = env()
109
+ expect((await removePeerCli('twice', { env: e })).action).toBe('removed')
110
+ expect((await removePeerCli('twice', { env: e })).action).toBe('absent')
111
+ })
112
+ })
113
+
93
114
  describe('send validation', () => {
94
115
  test('invalid --from identity → throws', async () => {
95
116
  await register('alpha')
package/src/cli/index.ts CHANGED
@@ -19,9 +19,9 @@ import {
19
19
  type Intelligence,
20
20
  type Runtime,
21
21
  } from '../core/constants.ts'
22
- import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
22
+ import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
23
23
  import { ensureGlobalIapScaffold } from '../storage/index.ts'
24
- import { findPeer, readPeersIndex, type PeerRecord } from '../registry/index.ts'
24
+ import { findPeer, readPeersIndex, removePeer, type PeerRecord } from '../registry/index.ts'
25
25
  import { isPeerLive, routeControl, routeSend, type WakeFn } from '../transport/index.ts'
26
26
  import {
27
27
  attachPeer,
@@ -31,6 +31,7 @@ import {
31
31
  isStopped,
32
32
  killSession,
33
33
  loadLifecycleConfig,
34
+ setNewEager,
34
35
  setStopped,
35
36
  wakeOrSpawn,
36
37
  } from '../lifecycle/index.ts'
@@ -225,6 +226,51 @@ export function startPeer(personality: string, runtime: string | undefined, opts
225
226
  return out
226
227
  }
227
228
 
229
+ // ─────────────────────────────────────────────────────────────────────────────
230
+ // remove — delete a peer's record from the registry through the LOCKED writer
231
+ // (registry.removePeer). Direct edits of peers-profiles.json are refused at
232
+ // storage.ts:304 (locked-writer invariant); this is the operator path that used
233
+ // to require dropping into `bun -e removePeer(...)`. The use case is reaping the
234
+ // ephemeral zombie records a retired spawn leaves behind.
235
+ // ─────────────────────────────────────────────────────────────────────────────
236
+
237
+ export interface RemoveOutcome {
238
+ personality: string
239
+ action: 'removed' | 'absent' | 'refused-live'
240
+ reason?: string
241
+ }
242
+
243
+ /**
244
+ * remove <peer> [--force]: drop the registry record via the locked writer.
245
+ * IDEMPOTENT — an absent peer is a no-op success (`absent`), never an error.
246
+ * SAFETY: refuses a peer that is currently LIVE on any runtime — deleting a
247
+ * running session's record would orphan it from routing (resolveCallerIdentity /
248
+ * findPeer would no longer resolve it while it still runs). --force overrides.
249
+ * A zombie record is dead by definition, so the guard never blocks the cleanup
250
+ * it exists for.
251
+ */
252
+ export async function removePeerCli(
253
+ personality: string,
254
+ opts: CliEnvOptions & { force?: boolean } = {},
255
+ ): Promise<RemoveOutcome> {
256
+ const env = opts.env ?? process.env
257
+ const peer = findPeer(readPeersIndex({ env }), personality)
258
+ if (!peer) return { personality, action: 'absent' }
259
+ if (!opts.force) {
260
+ const cfg = loadLifecycleConfig(env)
261
+ const liveRt = peer.runtimes.find(rt => isPeerLive(rt, personality, cfg.sockDir))
262
+ if (liveRt) {
263
+ return {
264
+ personality,
265
+ action: 'refused-live',
266
+ reason: `"${personality}" is LIVE on ${liveRt} — removing its registry record would orphan the running session from routing; stop it first or pass --force`,
267
+ }
268
+ }
269
+ }
270
+ await removePeer(personality, { env })
271
+ return { personality, action: 'removed' }
272
+ }
273
+
228
274
  // ─────────────────────────────────────────────────────────────────────────────
229
275
  // send — manual IAP send fallback (contract Примитивы §send). Goes through the
230
276
  // same router path as send_to_peer (resolve → deliver / wake), in-process so it
@@ -308,12 +354,14 @@ const USAGE = `usage: iapeer <verb> [args]
308
354
  list [--json] registered peers + per-runtime liveness
309
355
  stop <peer> [runtime] | --all durable-stop a warm peer / bootout an always-on one
310
356
  start <peer> [runtime] re-enable a stopped peer / bootstrap an always-on one
357
+ remove <peer> [--force] delete a peer's registry record (locked writer); refuses a LIVE peer unless --force
311
358
  send <target> (--message <text> | --message-file <f|->) [--from <id>] [--attachment <p>]… [--topic <t>] manual IAP send (fallback)
312
359
  <runtime> launch the cwd's peer (ALWAYS fresh)
313
360
  enable <plugin> [peer] [--no-setup] install + enable an agfpd capability for a peer
314
361
  attach <peer> [runtime] ensure-live + resume, then tmux attach
315
362
  interrupt <peer> [runtime] interrupt the current turn (Escape) — context intact
316
363
  compact <peer> [runtime] compact the peer's context (/compact)
364
+ self-fresh (agent self-call) mark /new eager-fresh + self-kill — the daemon relaunches fresh
317
365
  `
318
366
 
319
367
  export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.env): Promise<number> {
@@ -457,6 +505,17 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
457
505
  for (const o of outcomes) out(`${o.personality} (${o.runtime}): ${o.action}${o.reason ? ` — ${o.reason}` : ''}\n`)
458
506
  return outcomes.some(o => o.action === 'refused-foreign-launchd') ? 1 : 0
459
507
  }
508
+ case 'remove': {
509
+ // Reap a registry record through the locked writer (the operator path over
510
+ // registry.removePeer). Idempotent on an absent peer (exit 0). Refuses a LIVE
511
+ // peer unless --force (orphaning a running session from routing is the risk).
512
+ if (!positionals[0]) return usage(errOut)
513
+ const o = await removePeerCli(positionals[0], { force: flags.force === true, env })
514
+ if (o.action === 'removed') out(`removed "${o.personality}" from the registry\n`)
515
+ else if (o.action === 'absent') out(`"${o.personality}" not registered — no-op\n`)
516
+ else errOut(`remove: ${o.reason}\n`)
517
+ return o.action === 'refused-live' ? 1 : 0
518
+ }
460
519
  case 'send': {
461
520
  // Message body from EITHER --message <text> OR --message-file <f> (f='-' →
462
521
  // stdin). The runtime packages (telegram/notifier) + monitor deliver via
@@ -543,6 +602,35 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
543
602
  if (!positionals[0] || !positionals[1]) return usage(errOut)
544
603
  return await runAlwaysOn(positionals[0], positionals[1], process.cwd())
545
604
  }
605
+ case 'self-fresh': {
606
+ // /new AGENT-FACING TRIGGER (TARGET redesign). Run BY the agent itself as the
607
+ // FINAL step of a /new graceful wind-down (the owner triggers it via a per-peer
608
+ // telegram alias: "write a handoff to durable memory, then run iapeer self-fresh"
609
+ // — the alias text is telegram-owned, NOT global doctrine). It: resolves the
610
+ // caller identity from PEER_IDENTITY (<runtime>-<personality>), writes the
611
+ // .new-eager mark, then self-kills the caller's OWN tmux session. The daemon's
612
+ // superviseTick then sees the dead session carrying .new-eager → eager fresh
613
+ // relaunch (with initial_prompt) so the agent reports it is back up.
614
+ const identity = env.PEER_IDENTITY?.trim()
615
+ if (!identity) {
616
+ errOut('self-fresh: PEER_IDENTITY is not set — this verb is an agent self-call from inside a session\n')
617
+ return 1
618
+ }
619
+ const addr = parseSessionName(identity)
620
+ if (!addr) {
621
+ errOut(`self-fresh: invalid PEER_IDENTITY "${identity}" — expected <runtime>-<personality>\n`)
622
+ return 1
623
+ }
624
+ const cfg = loadLifecycleConfig(env)
625
+ // Mark FIRST, kill SECOND: if the kill races ahead of the mark the daemon would
626
+ // see a dead session with no .new-eager → a plain reaped-gone (lazy fresh on the
627
+ // next message), not the eager relaunch — degrade gracefully, never lose the mark.
628
+ setNewEager(cfg, identity)
629
+ out(`self-fresh: marked ${identity} for eager fresh re-launch; self-killing session\n`)
630
+ const sock = buildSocketPath(addr.runtime, addr.personality, cfg.sockDir)
631
+ killSession(sock, identity)
632
+ return 0
633
+ }
546
634
  case 'interrupt':
547
635
  case 'compact': {
548
636
  // In-session control (Ф-E, clean-slash namespace): interrupt a stuck/raving
@@ -13,7 +13,13 @@ export const SUPPORTED_LOCAL_RUNTIMES = ['claude', 'codex'] as const
13
13
  export type SupportedLocalRuntime = (typeof SUPPORTED_LOCAL_RUNTIMES)[number]
14
14
 
15
15
  export const PEERS_SCHEMA_VERSION = 2
16
- export const MAX_DESCRIPTION_LEN = 250
16
+ // 450 (was 250): self-documenting API-peer descriptions (notifier timer/watcher)
17
+ // must fit "who the peer is + registration format + a live example" — dense full
18
+ // texts run to ~408 chars; 250 cut them mid-word so the caller could not compose
19
+ // the call. Bumped with Arthur's sanction (2026-06-08). NB: this is COMPILE-TIME
20
+ // baked — the live daemon re-clamps descriptions on read (registry parsePeerRecord),
21
+ // so the running router keeps the OLD limit until restarted onto the new binary.
22
+ export const MAX_DESCRIPTION_LEN = 450
17
23
 
18
24
  // Contract vocabulary (docs/Идентичность, Артур 05.06): the nature of the
19
25
  // intelligence expressing itself through a runtime.
@@ -99,8 +99,13 @@ export async function startConfiguredDaemon(opts: ConfiguredDaemonOptions = {}):
99
99
  wake: makeWakeFn(cfg, env),
100
100
  supervise: {
101
101
  intervalMs: opts.superviseIntervalMs ?? DEFAULT_SUPERVISE_INTERVAL_MS,
102
- // idle-reap / zombie-sweep, THEN C4b eager fresh re-launch for any peer whose
103
- // session died carrying a /new graceful mark (async, best-effort).
102
+ // idle-reap / zombie-sweep, THEN the eager fresh re-launch for any peer whose
103
+ // session died carrying a .new-eager mark (owner /new; async, best-effort).
104
+ // The DURABLE decision trace (which peer, what outcome, when, why) is emitted
105
+ // INSIDE superviseTick (lifecycle/eventlog.ts → logs/iapeer/lifecycle.log), so
106
+ // every reap is recorded regardless of entry point (this timer AND the heal-at-
107
+ // wake superviseTick inside wakeOrSpawn). The outcomes array drives only the
108
+ // eager relaunch here; the trace does not depend on consuming it.
104
109
  tick: async () => {
105
110
  const outcomes = superviseTick(cfg, { env })
106
111
  await processEagerRelaunches(cfg, outcomes, { env })
@@ -0,0 +1,114 @@
1
+ // eventlog — the daemon's durable, rotated lifecycle decision log. Tests the pure
2
+ // logfmt formatter, the append path (into an explicit temp logDir — never the real
3
+ // ~/.iapeer), and the size-rotation chain. No daemon, no tmux — pure FS.
4
+
5
+ import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
6
+ import { existsSync, mkdtempSync, readFileSync, rmSync, statSync } from 'fs'
7
+ import { tmpdir } from 'os'
8
+ import { join } from 'path'
9
+ import {
10
+ appendLifecycleEvent,
11
+ fmtValue,
12
+ formatEventLine,
13
+ lifecycleLogPath,
14
+ } from './eventlog.ts'
15
+
16
+ const TS = 1_749_470_400_000 // fixed epoch-ms → a stable ISO for golden lines
17
+ const ISO = new Date(TS).toISOString()
18
+
19
+ describe('fmtValue (logfmt escaping)', () => {
20
+ test('bare token stays bare', () => {
21
+ expect(fmtValue('reaped-idle')).toBe('reaped-idle')
22
+ expect(fmtValue('claude-boris')).toBe('claude-boris')
23
+ expect(fmtValue(42)).toBe('42')
24
+ })
25
+ test('empty string → ""', () => {
26
+ expect(fmtValue('')).toBe('""')
27
+ })
28
+ test('whitespace / = / " force quoting and escape', () => {
29
+ expect(fmtValue('session no longer live')).toBe('"session no longer live"')
30
+ expect(fmtValue('a=b')).toBe('"a=b"')
31
+ expect(fmtValue('say "hi"')).toBe('"say \\"hi\\""')
32
+ expect(fmtValue('back\\slash here')).toBe('"back\\\\slash here"')
33
+ })
34
+ })
35
+
36
+ describe('formatEventLine', () => {
37
+ test('ts is first; fields keep insertion order; undefined skipped', () => {
38
+ const line = formatEventLine(TS, {
39
+ ev: 'supervise',
40
+ identity: 'claude-boris',
41
+ action: 'reaped-gone',
42
+ reason: 'session no longer live',
43
+ ref: undefined, // dropped
44
+ outcome: 'fresh-next-msg',
45
+ })
46
+ expect(line).toBe(
47
+ `ts=${ISO} ev=supervise identity=claude-boris action=reaped-gone reason="session no longer live" outcome=fresh-next-msg`,
48
+ )
49
+ })
50
+ test('age field renders as a bare token', () => {
51
+ const line = formatEventLine(TS, { ev: 'supervise', identity: 'claude-x', action: 'reaped-idle', age: '4230s' })
52
+ expect(line).toBe(`ts=${ISO} ev=supervise identity=claude-x action=reaped-idle age=4230s`)
53
+ })
54
+ })
55
+
56
+ describe('appendLifecycleEvent', () => {
57
+ let dir: string
58
+
59
+ beforeEach(() => {
60
+ dir = mkdtempSync(join(tmpdir(), 'iapeer-eventlog-'))
61
+ })
62
+ afterEach(() => {
63
+ rmSync(dir, { recursive: true, force: true })
64
+ })
65
+
66
+ test('falsy logDir → no-op (a partial cfg never writes / never resolves a real path)', () => {
67
+ expect(() => appendLifecycleEvent(undefined, { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
68
+ expect(() => appendLifecycleEvent('', { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
69
+ })
70
+
71
+ test('writes one logfmt line per call, appended in order', () => {
72
+ appendLifecycleEvent(dir, { ev: 'wake', personality: 'boris', mode: 'fresh', cause: 'crash-or-self-close' }, { nowMs: TS })
73
+ appendLifecycleEvent(dir, { ev: 'supervise', identity: 'claude-doc', action: 'reaped-gone' }, { nowMs: TS + 1000 })
74
+ const body = readFileSync(lifecycleLogPath(dir), 'utf8')
75
+ const lines = body.trimEnd().split('\n')
76
+ expect(lines).toHaveLength(2)
77
+ expect(lines[0]).toBe(`ts=${ISO} ev=wake personality=boris mode=fresh cause=crash-or-self-close`)
78
+ expect(lines[1]).toContain('ev=supervise identity=claude-doc action=reaped-gone')
79
+ })
80
+
81
+ test('creates the log dir if absent', () => {
82
+ const nested = join(dir, 'logs', 'iapeer')
83
+ appendLifecycleEvent(nested, { ev: 'supervise', identity: 'x' }, { nowMs: TS })
84
+ expect(existsSync(lifecycleLogPath(nested))).toBe(true)
85
+ })
86
+
87
+ test('size rotation: base → .1, oldest dropped past keep', () => {
88
+ const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '120', IAPEER_LIFECYCLE_LOG_KEEP: '2' }
89
+ const path = lifecycleLogPath(dir)
90
+ for (let i = 0; i < 6; i++) {
91
+ appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-peer${i}`, action: 'reaped-gone', n: i }, { env, nowMs: TS + i })
92
+ }
93
+ expect(existsSync(path)).toBe(true)
94
+ expect(existsSync(`${path}.1`)).toBe(true)
95
+ expect(existsSync(`${path}.2`)).toBe(true)
96
+ expect(existsSync(`${path}.3`)).toBe(false) // keep=2 → never a .3
97
+ expect(statSync(path).size).toBeLessThanOrEqual(200)
98
+ expect(readFileSync(path, 'utf8')).toContain('claude-peer5') // newest in the live base file
99
+ })
100
+
101
+ test('rotation preserves chronological order across files (.N oldest, base newest)', () => {
102
+ const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '90', IAPEER_LIFECYCLE_LOG_KEEP: '3' }
103
+ const path = lifecycleLogPath(dir)
104
+ for (let i = 0; i < 4; i++) {
105
+ appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-p${i}` }, { env, nowMs: TS + i })
106
+ }
107
+ const ordered = ['.3', '.2', '.1', '']
108
+ .map(suf => (existsSync(path + suf) ? readFileSync(path + suf, 'utf8') : ''))
109
+ .join('')
110
+ const seen = [...ordered.matchAll(/identity=claude-p(\d)/g)].map(m => Number(m[1]))
111
+ expect(seen).toEqual([...seen].sort((a, b) => a - b))
112
+ expect(seen[seen.length - 1]).toBe(3) // newest line is p3, in the base file
113
+ })
114
+ })
@@ -0,0 +1,133 @@
1
+ // Lifecycle event log — the daemon's DURABLE, ROTATED trace of every lifecycle
2
+ // DECISION it makes. This is the observability gap the boris-fresh incident hit:
3
+ // a peer woke fresh and there was NO record of when/how its prior session ended,
4
+ // nor of the daemon's fresh-vs-resume reasoning, because superviseTick's outcomes
5
+ // were dropped and the daemon never wrote a decision line anywhere.
6
+ //
7
+ // Design:
8
+ // • One line per decision, logfmt (`key=value`, values quoted iff they contain
9
+ // whitespace/quotes/`=`). Human-greppable AND machine-parseable. The state
10
+ // markers (.idle-reaped / .deaths) are CONSUMED on the next wake — this log is
11
+ // the part that survives, so a postmortem can reconstruct the death even after
12
+ // the marker is gone.
13
+ // • Append-only, app-managed SIZE rotation (NOT launchd's stdout/stderr, which
14
+ // are unbounded and truncated on restart). lifecycle.log → .1 … .N.
15
+ // • The target directory is passed IN (cfg.eventLogDir), NOT re-resolved from
16
+ // env — so it is isolated by the SAME cfg the rest of lifecycle routes through
17
+ // (a test that sandboxes cfg.stateDir also sandboxes this log; no leak to the
18
+ // real ~/.iapeer). A falsy dir → no-op (a partial test cfg never writes).
19
+ // • Best-effort throughout: a write/rotate failure is swallowed. Observability
20
+ // must never take down the daemon or fail a wake/reap.
21
+ //
22
+ // Lifted-out-able: the rotate-append primitive is path-parameterized, so the
23
+ // adjacent "log rotation" phase can promote it to storage/ and point other log
24
+ // producers at it without touching this module's call sites.
25
+
26
+ import { appendFileSync, mkdirSync, renameSync, rmSync, statSync } from 'fs'
27
+ import { join } from 'path'
28
+
29
+ /** Default cap per lifecycle.log file before it rotates to lifecycle.log.1. */
30
+ const DEFAULT_MAX_BYTES = 5 * 1024 * 1024 // 5 MiB
31
+ /** Default number of rotated backups kept (lifecycle.log.1 … .KEEP). */
32
+ const DEFAULT_KEEP = 5
33
+
34
+ /** The durable lifecycle decision log inside `logDir` (cfg.eventLogDir). */
35
+ export function lifecycleLogPath(logDir: string): string {
36
+ return join(logDir, 'lifecycle.log')
37
+ }
38
+
39
+ function envPosInt(raw: string | undefined, dflt: number): number {
40
+ const n = parseInt(raw ?? '', 10)
41
+ return Number.isFinite(n) && n > 0 ? n : dflt
42
+ }
43
+
44
+ /** Whether to also log the steady-state non-decisions (alive / skipped-launchd).
45
+ * Off by default — they fire every tick per live/launchd peer and would bury the
46
+ * actual decisions (reap / wake) under heartbeat noise. */
47
+ export function superviseLogVerbose(env: NodeJS.ProcessEnv = process.env): boolean {
48
+ const v = env.IAPEER_SUPERVISE_LOG_VERBOSE?.trim().toLowerCase()
49
+ return v === '1' || v === 'true' || v === 'yes'
50
+ }
51
+
52
+ /** logfmt value: bare token, or double-quoted with `"`/`\` escaped, when it
53
+ * contains whitespace, `=` or `"`. Empty string → `""`. */
54
+ export function fmtValue(v: string | number): string {
55
+ const s = String(v)
56
+ if (s === '') return '""'
57
+ if (/[\s"=]/.test(s)) return `"${s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`
58
+ return s
59
+ }
60
+
61
+ /** Render one logfmt line (ts first, then fields in insertion order; undefined
62
+ * fields are skipped). No trailing newline. Pure — unit-testable. */
63
+ export function formatEventLine(nowMs: number, fields: Record<string, string | number | undefined>): string {
64
+ const parts = [`ts=${new Date(nowMs).toISOString()}`]
65
+ for (const [k, v] of Object.entries(fields)) {
66
+ if (v === undefined) continue
67
+ parts.push(`${k}=${fmtValue(v)}`)
68
+ }
69
+ return parts.join(' ')
70
+ }
71
+
72
+ /** Size-rotate `path` (and its .1 … .keep backups) when the next line would push
73
+ * it over `maxBytes`. Drops the oldest, shifts each backup up by one, base→.1.
74
+ * Best-effort: any fs hiccup leaves the chain as-is (we then just append). */
75
+ function rotateIfNeeded(path: string, lineLen: number, maxBytes: number, keep: number): void {
76
+ let size: number
77
+ try {
78
+ size = statSync(path).size
79
+ } catch {
80
+ return // no file yet → nothing to rotate
81
+ }
82
+ if (size + lineLen <= maxBytes) return
83
+ try {
84
+ rmSync(`${path}.${keep}`, { force: true })
85
+ } catch {
86
+ /* best-effort */
87
+ }
88
+ for (let i = keep - 1; i >= 1; i--) {
89
+ try {
90
+ renameSync(`${path}.${i}`, `${path}.${i + 1}`)
91
+ } catch {
92
+ /* that backup may not exist yet */
93
+ }
94
+ }
95
+ try {
96
+ renameSync(path, `${path}.1`)
97
+ } catch {
98
+ /* best-effort */
99
+ }
100
+ }
101
+
102
+ export interface AppendEventOptions {
103
+ /** Reads the rotation knobs IAPEER_LIFECYCLE_LOG_MAX_BYTES / _KEEP. */
104
+ env?: NodeJS.ProcessEnv
105
+ /** Stamp the line with this epoch-ms (superviseTick passes its own tick clock so
106
+ * the log timestamp agrees with the death/idle accounting). Default Date.now(). */
107
+ nowMs?: number
108
+ }
109
+
110
+ /**
111
+ * Append one lifecycle decision line into `logDir`/lifecycle.log. A falsy `logDir`
112
+ * is a no-op (a partial test cfg without eventLogDir never writes — and never
113
+ * resolves a real path). Fully best-effort — never throws.
114
+ */
115
+ export function appendLifecycleEvent(
116
+ logDir: string | undefined,
117
+ fields: Record<string, string | number | undefined>,
118
+ opts: AppendEventOptions = {},
119
+ ): void {
120
+ if (!logDir) return
121
+ const env = opts.env ?? process.env
122
+ const path = lifecycleLogPath(logDir)
123
+ const line = `${formatEventLine(opts.nowMs ?? Date.now(), fields)}\n`
124
+ const maxBytes = envPosInt(env.IAPEER_LIFECYCLE_LOG_MAX_BYTES, DEFAULT_MAX_BYTES)
125
+ const keep = envPosInt(env.IAPEER_LIFECYCLE_LOG_KEEP, DEFAULT_KEEP)
126
+ try {
127
+ mkdirSync(logDir, { recursive: true, mode: 0o700 })
128
+ rotateIfNeeded(path, line.length, maxBytes, keep)
129
+ appendFileSync(path, line, { mode: 0o600 })
130
+ } catch {
131
+ /* observability is best-effort — a log failure must never break a wake/reap */
132
+ }
133
+ }