@agfpd/iapeer 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -2
- package/src/cli/cli.test.ts +23 -2
- package/src/cli/index.ts +90 -2
- package/src/core/constants.ts +7 -1
- package/src/daemon/main.ts +7 -2
- package/src/lifecycle/eventlog.test.ts +114 -0
- package/src/lifecycle/eventlog.ts +133 -0
- package/src/lifecycle/index.ts +292 -56
- package/src/lifecycle/lifecycle.test.ts +208 -63
- package/src/registry/registry.test.ts +33 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agfpd/iapeer",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Foundation core for the IAPeer multi-agent ecosystem: identity, registry, storage, codec.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,14 @@
|
|
|
21
21
|
},
|
|
22
22
|
"scripts": {
|
|
23
23
|
"test": "IAPEER_TEST_SANDBOX=1 bun test",
|
|
24
|
-
"typecheck": "tsc --noEmit"
|
|
24
|
+
"typecheck": "tsc --noEmit",
|
|
25
|
+
"release": "npm version patch && npm publish && git push --follow-tags",
|
|
26
|
+
"release:minor": "npm version minor && npm publish && git push --follow-tags",
|
|
27
|
+
"release:major": "npm version major && npm publish && git push --follow-tags",
|
|
28
|
+
"prepublishOnly": "test -z \"$(git status --porcelain)\" || (echo 'release: working tree is dirty — commit or stash before release' >&2 && exit 1)"
|
|
29
|
+
},
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
25
32
|
},
|
|
26
33
|
"dependencies": {
|
|
27
34
|
"@modelcontextprotocol/sdk": "1.29.0",
|
package/src/cli/cli.test.ts
CHANGED
|
@@ -7,8 +7,8 @@ import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
|
7
7
|
import { mkdtempSync, rmSync, writeFileSync } from 'fs'
|
|
8
8
|
import { tmpdir } from 'os'
|
|
9
9
|
import { join } from 'path'
|
|
10
|
-
import { formatListTable, listPeers, parseArgs, sendMessage, startPeer, stopPeer } from './index.ts'
|
|
11
|
-
import { upsertPeer } from '../registry/index.ts'
|
|
10
|
+
import { formatListTable, listPeers, parseArgs, removePeerCli, sendMessage, startPeer, stopPeer } from './index.ts'
|
|
11
|
+
import { findPeer, readPeersIndex, upsertPeer } from '../registry/index.ts'
|
|
12
12
|
import { isStopped, loadLifecycleConfig, setStopped } from '../lifecycle/index.ts'
|
|
13
13
|
import { launchdPlistPath } from '../launch/launchd.ts'
|
|
14
14
|
|
|
@@ -90,6 +90,27 @@ describe('FLEET GUARD (H4) — foreign persistent-peer launchd plist is off-limi
|
|
|
90
90
|
})
|
|
91
91
|
})
|
|
92
92
|
|
|
93
|
+
describe('remove (registry record via the locked writer)', () => {
|
|
94
|
+
test('removes a registered peer through registry.removePeer', async () => {
|
|
95
|
+
await register('zombie')
|
|
96
|
+
const e = env()
|
|
97
|
+
expect(findPeer(readPeersIndex({ env: e }), 'zombie')).not.toBeNull()
|
|
98
|
+
const o = await removePeerCli('zombie', { env: e })
|
|
99
|
+
expect(o.action).toBe('removed')
|
|
100
|
+
expect(findPeer(readPeersIndex({ env: e }), 'zombie')).toBeNull()
|
|
101
|
+
})
|
|
102
|
+
test('removing an absent peer is an idempotent no-op (not an error)', async () => {
|
|
103
|
+
const o = await removePeerCli('never-existed', { env: env() })
|
|
104
|
+
expect(o.action).toBe('absent')
|
|
105
|
+
})
|
|
106
|
+
test('a second remove of the same peer is also a no-op', async () => {
|
|
107
|
+
await register('twice')
|
|
108
|
+
const e = env()
|
|
109
|
+
expect((await removePeerCli('twice', { env: e })).action).toBe('removed')
|
|
110
|
+
expect((await removePeerCli('twice', { env: e })).action).toBe('absent')
|
|
111
|
+
})
|
|
112
|
+
})
|
|
113
|
+
|
|
93
114
|
describe('send validation', () => {
|
|
94
115
|
test('invalid --from identity → throws', async () => {
|
|
95
116
|
await register('alpha')
|
package/src/cli/index.ts
CHANGED
|
@@ -19,9 +19,9 @@ import {
|
|
|
19
19
|
type Intelligence,
|
|
20
20
|
type Runtime,
|
|
21
21
|
} from '../core/constants.ts'
|
|
22
|
-
import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
|
|
22
|
+
import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
|
|
23
23
|
import { ensureGlobalIapScaffold } from '../storage/index.ts'
|
|
24
|
-
import { findPeer, readPeersIndex, type PeerRecord } from '../registry/index.ts'
|
|
24
|
+
import { findPeer, readPeersIndex, removePeer, type PeerRecord } from '../registry/index.ts'
|
|
25
25
|
import { isPeerLive, routeControl, routeSend, type WakeFn } from '../transport/index.ts'
|
|
26
26
|
import {
|
|
27
27
|
attachPeer,
|
|
@@ -31,6 +31,7 @@ import {
|
|
|
31
31
|
isStopped,
|
|
32
32
|
killSession,
|
|
33
33
|
loadLifecycleConfig,
|
|
34
|
+
setNewEager,
|
|
34
35
|
setStopped,
|
|
35
36
|
wakeOrSpawn,
|
|
36
37
|
} from '../lifecycle/index.ts'
|
|
@@ -225,6 +226,51 @@ export function startPeer(personality: string, runtime: string | undefined, opts
|
|
|
225
226
|
return out
|
|
226
227
|
}
|
|
227
228
|
|
|
229
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
230
|
+
// remove — delete a peer's record from the registry through the LOCKED writer
|
|
231
|
+
// (registry.removePeer). Direct edits of peers-profiles.json are refused at
|
|
232
|
+
// storage.ts:304 (locked-writer invariant); this is the operator path that used
|
|
233
|
+
// to require dropping into `bun -e removePeer(...)`. The use case is reaping the
|
|
234
|
+
// ephemeral zombie records a retired spawn leaves behind.
|
|
235
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
export interface RemoveOutcome {
|
|
238
|
+
personality: string
|
|
239
|
+
action: 'removed' | 'absent' | 'refused-live'
|
|
240
|
+
reason?: string
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* remove <peer> [--force]: drop the registry record via the locked writer.
|
|
245
|
+
* IDEMPOTENT — an absent peer is a no-op success (`absent`), never an error.
|
|
246
|
+
* SAFETY: refuses a peer that is currently LIVE on any runtime — deleting a
|
|
247
|
+
* running session's record would orphan it from routing (resolveCallerIdentity /
|
|
248
|
+
* findPeer would no longer resolve it while it still runs). --force overrides.
|
|
249
|
+
* A zombie record is dead by definition, so the guard never blocks the cleanup
|
|
250
|
+
* it exists for.
|
|
251
|
+
*/
|
|
252
|
+
export async function removePeerCli(
|
|
253
|
+
personality: string,
|
|
254
|
+
opts: CliEnvOptions & { force?: boolean } = {},
|
|
255
|
+
): Promise<RemoveOutcome> {
|
|
256
|
+
const env = opts.env ?? process.env
|
|
257
|
+
const peer = findPeer(readPeersIndex({ env }), personality)
|
|
258
|
+
if (!peer) return { personality, action: 'absent' }
|
|
259
|
+
if (!opts.force) {
|
|
260
|
+
const cfg = loadLifecycleConfig(env)
|
|
261
|
+
const liveRt = peer.runtimes.find(rt => isPeerLive(rt, personality, cfg.sockDir))
|
|
262
|
+
if (liveRt) {
|
|
263
|
+
return {
|
|
264
|
+
personality,
|
|
265
|
+
action: 'refused-live',
|
|
266
|
+
reason: `"${personality}" is LIVE on ${liveRt} — removing its registry record would orphan the running session from routing; stop it first or pass --force`,
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
await removePeer(personality, { env })
|
|
271
|
+
return { personality, action: 'removed' }
|
|
272
|
+
}
|
|
273
|
+
|
|
228
274
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
229
275
|
// send — manual IAP send fallback (contract Примитивы §send). Goes through the
|
|
230
276
|
// same router path as send_to_peer (resolve → deliver / wake), in-process so it
|
|
@@ -308,12 +354,14 @@ const USAGE = `usage: iapeer <verb> [args]
|
|
|
308
354
|
list [--json] registered peers + per-runtime liveness
|
|
309
355
|
stop <peer> [runtime] | --all durable-stop a warm peer / bootout an always-on one
|
|
310
356
|
start <peer> [runtime] re-enable a stopped peer / bootstrap an always-on one
|
|
357
|
+
remove <peer> [--force] delete a peer's registry record (locked writer); refuses a LIVE peer unless --force
|
|
311
358
|
send <target> (--message <text> | --message-file <f|->) [--from <id>] [--attachment <p>]… [--topic <t>] manual IAP send (fallback)
|
|
312
359
|
<runtime> launch the cwd's peer (ALWAYS fresh)
|
|
313
360
|
enable <plugin> [peer] [--no-setup] install + enable an agfpd capability for a peer
|
|
314
361
|
attach <peer> [runtime] ensure-live + resume, then tmux attach
|
|
315
362
|
interrupt <peer> [runtime] interrupt the current turn (Escape) — context intact
|
|
316
363
|
compact <peer> [runtime] compact the peer's context (/compact)
|
|
364
|
+
self-fresh (agent self-call) mark /new eager-fresh + self-kill — the daemon relaunches fresh
|
|
317
365
|
`
|
|
318
366
|
|
|
319
367
|
export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.env): Promise<number> {
|
|
@@ -457,6 +505,17 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
|
|
|
457
505
|
for (const o of outcomes) out(`${o.personality} (${o.runtime}): ${o.action}${o.reason ? ` — ${o.reason}` : ''}\n`)
|
|
458
506
|
return outcomes.some(o => o.action === 'refused-foreign-launchd') ? 1 : 0
|
|
459
507
|
}
|
|
508
|
+
case 'remove': {
|
|
509
|
+
// Reap a registry record through the locked writer (the operator path over
|
|
510
|
+
// registry.removePeer). Idempotent on an absent peer (exit 0). Refuses a LIVE
|
|
511
|
+
// peer unless --force (orphaning a running session from routing is the risk).
|
|
512
|
+
if (!positionals[0]) return usage(errOut)
|
|
513
|
+
const o = await removePeerCli(positionals[0], { force: flags.force === true, env })
|
|
514
|
+
if (o.action === 'removed') out(`removed "${o.personality}" from the registry\n`)
|
|
515
|
+
else if (o.action === 'absent') out(`"${o.personality}" not registered — no-op\n`)
|
|
516
|
+
else errOut(`remove: ${o.reason}\n`)
|
|
517
|
+
return o.action === 'refused-live' ? 1 : 0
|
|
518
|
+
}
|
|
460
519
|
case 'send': {
|
|
461
520
|
// Message body from EITHER --message <text> OR --message-file <f> (f='-' →
|
|
462
521
|
// stdin). The runtime packages (telegram/notifier) + monitor deliver via
|
|
@@ -543,6 +602,35 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
|
|
|
543
602
|
if (!positionals[0] || !positionals[1]) return usage(errOut)
|
|
544
603
|
return await runAlwaysOn(positionals[0], positionals[1], process.cwd())
|
|
545
604
|
}
|
|
605
|
+
case 'self-fresh': {
|
|
606
|
+
// /new AGENT-FACING TRIGGER (TARGET redesign). Run BY the agent itself as the
|
|
607
|
+
// FINAL step of a /new graceful wind-down (the owner triggers it via a per-peer
|
|
608
|
+
// telegram alias: "write a handoff to durable memory, then run iapeer self-fresh"
|
|
609
|
+
// — the alias text is telegram-owned, NOT global doctrine). It: resolves the
|
|
610
|
+
// caller identity from PEER_IDENTITY (<runtime>-<personality>), writes the
|
|
611
|
+
// .new-eager mark, then self-kills the caller's OWN tmux session. The daemon's
|
|
612
|
+
// superviseTick then sees the dead session carrying .new-eager → eager fresh
|
|
613
|
+
// relaunch (with initial_prompt) so the agent reports it is back up.
|
|
614
|
+
const identity = env.PEER_IDENTITY?.trim()
|
|
615
|
+
if (!identity) {
|
|
616
|
+
errOut('self-fresh: PEER_IDENTITY is not set — this verb is an agent self-call from inside a session\n')
|
|
617
|
+
return 1
|
|
618
|
+
}
|
|
619
|
+
const addr = parseSessionName(identity)
|
|
620
|
+
if (!addr) {
|
|
621
|
+
errOut(`self-fresh: invalid PEER_IDENTITY "${identity}" — expected <runtime>-<personality>\n`)
|
|
622
|
+
return 1
|
|
623
|
+
}
|
|
624
|
+
const cfg = loadLifecycleConfig(env)
|
|
625
|
+
// Mark FIRST, kill SECOND: if the kill races ahead of the mark the daemon would
|
|
626
|
+
// see a dead session with no .new-eager → a plain reaped-gone (lazy fresh on the
|
|
627
|
+
// next message), not the eager relaunch — degrade gracefully, never lose the mark.
|
|
628
|
+
setNewEager(cfg, identity)
|
|
629
|
+
out(`self-fresh: marked ${identity} for eager fresh re-launch; self-killing session\n`)
|
|
630
|
+
const sock = buildSocketPath(addr.runtime, addr.personality, cfg.sockDir)
|
|
631
|
+
killSession(sock, identity)
|
|
632
|
+
return 0
|
|
633
|
+
}
|
|
546
634
|
case 'interrupt':
|
|
547
635
|
case 'compact': {
|
|
548
636
|
// In-session control (Ф-E, clean-slash namespace): interrupt a stuck/raving
|
package/src/core/constants.ts
CHANGED
|
@@ -13,7 +13,13 @@ export const SUPPORTED_LOCAL_RUNTIMES = ['claude', 'codex'] as const
|
|
|
13
13
|
export type SupportedLocalRuntime = (typeof SUPPORTED_LOCAL_RUNTIMES)[number]
|
|
14
14
|
|
|
15
15
|
export const PEERS_SCHEMA_VERSION = 2
|
|
16
|
-
|
|
16
|
+
// 450 (was 250): self-documenting API-peer descriptions (notifier timer/watcher)
|
|
17
|
+
// must fit "who the peer is + registration format + a live example" — dense full
|
|
18
|
+
// texts run to ~408 chars; 250 cut them mid-word so the caller could not compose
|
|
19
|
+
// the call. Bumped with Arthur's sanction (2026-06-08). NB: this is COMPILE-TIME
|
|
20
|
+
// baked — the live daemon re-clamps descriptions on read (registry parsePeerRecord),
|
|
21
|
+
// so the running router keeps the OLD limit until restarted onto the new binary.
|
|
22
|
+
export const MAX_DESCRIPTION_LEN = 450
|
|
17
23
|
|
|
18
24
|
// Contract vocabulary (docs/Идентичность, Артур 05.06): the nature of the
|
|
19
25
|
// intelligence expressing itself through a runtime.
|
package/src/daemon/main.ts
CHANGED
|
@@ -99,8 +99,13 @@ export async function startConfiguredDaemon(opts: ConfiguredDaemonOptions = {}):
|
|
|
99
99
|
wake: makeWakeFn(cfg, env),
|
|
100
100
|
supervise: {
|
|
101
101
|
intervalMs: opts.superviseIntervalMs ?? DEFAULT_SUPERVISE_INTERVAL_MS,
|
|
102
|
-
// idle-reap / zombie-sweep, THEN
|
|
103
|
-
// session died carrying a
|
|
102
|
+
// idle-reap / zombie-sweep, THEN the eager fresh re-launch for any peer whose
|
|
103
|
+
// session died carrying a .new-eager mark (owner /new; async, best-effort).
|
|
104
|
+
// The DURABLE decision trace (which peer, what outcome, when, why) is emitted
|
|
105
|
+
// INSIDE superviseTick (lifecycle/eventlog.ts → logs/iapeer/lifecycle.log), so
|
|
106
|
+
// every reap is recorded regardless of entry point (this timer AND the heal-at-
|
|
107
|
+
// wake superviseTick inside wakeOrSpawn). The outcomes array drives only the
|
|
108
|
+
// eager relaunch here; the trace does not depend on consuming it.
|
|
104
109
|
tick: async () => {
|
|
105
110
|
const outcomes = superviseTick(cfg, { env })
|
|
106
111
|
await processEagerRelaunches(cfg, outcomes, { env })
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// eventlog — the daemon's durable, rotated lifecycle decision log. Tests the pure
|
|
2
|
+
// logfmt formatter, the append path (into an explicit temp logDir — never the real
|
|
3
|
+
// ~/.iapeer), and the size-rotation chain. No daemon, no tmux — pure FS.
|
|
4
|
+
|
|
5
|
+
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
6
|
+
import { existsSync, mkdtempSync, readFileSync, rmSync, statSync } from 'fs'
|
|
7
|
+
import { tmpdir } from 'os'
|
|
8
|
+
import { join } from 'path'
|
|
9
|
+
import {
|
|
10
|
+
appendLifecycleEvent,
|
|
11
|
+
fmtValue,
|
|
12
|
+
formatEventLine,
|
|
13
|
+
lifecycleLogPath,
|
|
14
|
+
} from './eventlog.ts'
|
|
15
|
+
|
|
16
|
+
const TS = 1_749_470_400_000 // fixed epoch-ms → a stable ISO for golden lines
|
|
17
|
+
const ISO = new Date(TS).toISOString()
|
|
18
|
+
|
|
19
|
+
describe('fmtValue (logfmt escaping)', () => {
|
|
20
|
+
test('bare token stays bare', () => {
|
|
21
|
+
expect(fmtValue('reaped-idle')).toBe('reaped-idle')
|
|
22
|
+
expect(fmtValue('claude-boris')).toBe('claude-boris')
|
|
23
|
+
expect(fmtValue(42)).toBe('42')
|
|
24
|
+
})
|
|
25
|
+
test('empty string → ""', () => {
|
|
26
|
+
expect(fmtValue('')).toBe('""')
|
|
27
|
+
})
|
|
28
|
+
test('whitespace / = / " force quoting and escape', () => {
|
|
29
|
+
expect(fmtValue('session no longer live')).toBe('"session no longer live"')
|
|
30
|
+
expect(fmtValue('a=b')).toBe('"a=b"')
|
|
31
|
+
expect(fmtValue('say "hi"')).toBe('"say \\"hi\\""')
|
|
32
|
+
expect(fmtValue('back\\slash here')).toBe('"back\\\\slash here"')
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('formatEventLine', () => {
|
|
37
|
+
test('ts is first; fields keep insertion order; undefined skipped', () => {
|
|
38
|
+
const line = formatEventLine(TS, {
|
|
39
|
+
ev: 'supervise',
|
|
40
|
+
identity: 'claude-boris',
|
|
41
|
+
action: 'reaped-gone',
|
|
42
|
+
reason: 'session no longer live',
|
|
43
|
+
ref: undefined, // dropped
|
|
44
|
+
outcome: 'fresh-next-msg',
|
|
45
|
+
})
|
|
46
|
+
expect(line).toBe(
|
|
47
|
+
`ts=${ISO} ev=supervise identity=claude-boris action=reaped-gone reason="session no longer live" outcome=fresh-next-msg`,
|
|
48
|
+
)
|
|
49
|
+
})
|
|
50
|
+
test('age field renders as a bare token', () => {
|
|
51
|
+
const line = formatEventLine(TS, { ev: 'supervise', identity: 'claude-x', action: 'reaped-idle', age: '4230s' })
|
|
52
|
+
expect(line).toBe(`ts=${ISO} ev=supervise identity=claude-x action=reaped-idle age=4230s`)
|
|
53
|
+
})
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
describe('appendLifecycleEvent', () => {
|
|
57
|
+
let dir: string
|
|
58
|
+
|
|
59
|
+
beforeEach(() => {
|
|
60
|
+
dir = mkdtempSync(join(tmpdir(), 'iapeer-eventlog-'))
|
|
61
|
+
})
|
|
62
|
+
afterEach(() => {
|
|
63
|
+
rmSync(dir, { recursive: true, force: true })
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('falsy logDir → no-op (a partial cfg never writes / never resolves a real path)', () => {
|
|
67
|
+
expect(() => appendLifecycleEvent(undefined, { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
|
|
68
|
+
expect(() => appendLifecycleEvent('', { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
test('writes one logfmt line per call, appended in order', () => {
|
|
72
|
+
appendLifecycleEvent(dir, { ev: 'wake', personality: 'boris', mode: 'fresh', cause: 'crash-or-self-close' }, { nowMs: TS })
|
|
73
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: 'claude-doc', action: 'reaped-gone' }, { nowMs: TS + 1000 })
|
|
74
|
+
const body = readFileSync(lifecycleLogPath(dir), 'utf8')
|
|
75
|
+
const lines = body.trimEnd().split('\n')
|
|
76
|
+
expect(lines).toHaveLength(2)
|
|
77
|
+
expect(lines[0]).toBe(`ts=${ISO} ev=wake personality=boris mode=fresh cause=crash-or-self-close`)
|
|
78
|
+
expect(lines[1]).toContain('ev=supervise identity=claude-doc action=reaped-gone')
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
test('creates the log dir if absent', () => {
|
|
82
|
+
const nested = join(dir, 'logs', 'iapeer')
|
|
83
|
+
appendLifecycleEvent(nested, { ev: 'supervise', identity: 'x' }, { nowMs: TS })
|
|
84
|
+
expect(existsSync(lifecycleLogPath(nested))).toBe(true)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
test('size rotation: base → .1, oldest dropped past keep', () => {
|
|
88
|
+
const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '120', IAPEER_LIFECYCLE_LOG_KEEP: '2' }
|
|
89
|
+
const path = lifecycleLogPath(dir)
|
|
90
|
+
for (let i = 0; i < 6; i++) {
|
|
91
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-peer${i}`, action: 'reaped-gone', n: i }, { env, nowMs: TS + i })
|
|
92
|
+
}
|
|
93
|
+
expect(existsSync(path)).toBe(true)
|
|
94
|
+
expect(existsSync(`${path}.1`)).toBe(true)
|
|
95
|
+
expect(existsSync(`${path}.2`)).toBe(true)
|
|
96
|
+
expect(existsSync(`${path}.3`)).toBe(false) // keep=2 → never a .3
|
|
97
|
+
expect(statSync(path).size).toBeLessThanOrEqual(200)
|
|
98
|
+
expect(readFileSync(path, 'utf8')).toContain('claude-peer5') // newest in the live base file
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
test('rotation preserves chronological order across files (.N oldest, base newest)', () => {
|
|
102
|
+
const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '90', IAPEER_LIFECYCLE_LOG_KEEP: '3' }
|
|
103
|
+
const path = lifecycleLogPath(dir)
|
|
104
|
+
for (let i = 0; i < 4; i++) {
|
|
105
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-p${i}` }, { env, nowMs: TS + i })
|
|
106
|
+
}
|
|
107
|
+
const ordered = ['.3', '.2', '.1', '']
|
|
108
|
+
.map(suf => (existsSync(path + suf) ? readFileSync(path + suf, 'utf8') : ''))
|
|
109
|
+
.join('')
|
|
110
|
+
const seen = [...ordered.matchAll(/identity=claude-p(\d)/g)].map(m => Number(m[1]))
|
|
111
|
+
expect(seen).toEqual([...seen].sort((a, b) => a - b))
|
|
112
|
+
expect(seen[seen.length - 1]).toBe(3) // newest line is p3, in the base file
|
|
113
|
+
})
|
|
114
|
+
})
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// Lifecycle event log — the daemon's DURABLE, ROTATED trace of every lifecycle
|
|
2
|
+
// DECISION it makes. This is the observability gap the boris-fresh incident hit:
|
|
3
|
+
// a peer woke fresh and there was NO record of when/how its prior session ended,
|
|
4
|
+
// nor of the daemon's fresh-vs-resume reasoning, because superviseTick's outcomes
|
|
5
|
+
// were dropped and the daemon never wrote a decision line anywhere.
|
|
6
|
+
//
|
|
7
|
+
// Design:
|
|
8
|
+
// • One line per decision, logfmt (`key=value`, values quoted iff they contain
|
|
9
|
+
// whitespace/quotes/`=`). Human-greppable AND machine-parseable. The state
|
|
10
|
+
// markers (.idle-reaped / .deaths) are CONSUMED on the next wake — this log is
|
|
11
|
+
// the part that survives, so a postmortem can reconstruct the death even after
|
|
12
|
+
// the marker is gone.
|
|
13
|
+
// • Append-only, app-managed SIZE rotation (NOT launchd's stdout/stderr, which
|
|
14
|
+
// are unbounded and truncated on restart). lifecycle.log → .1 … .N.
|
|
15
|
+
// • The target directory is passed IN (cfg.eventLogDir), NOT re-resolved from
|
|
16
|
+
// env — so it is isolated by the SAME cfg the rest of lifecycle routes through
|
|
17
|
+
// (a test that sandboxes cfg.stateDir also sandboxes this log; no leak to the
|
|
18
|
+
// real ~/.iapeer). A falsy dir → no-op (a partial test cfg never writes).
|
|
19
|
+
// • Best-effort throughout: a write/rotate failure is swallowed. Observability
|
|
20
|
+
// must never take down the daemon or fail a wake/reap.
|
|
21
|
+
//
|
|
22
|
+
// Lifted-out-able: the rotate-append primitive is path-parameterized, so the
|
|
23
|
+
// adjacent "log rotation" phase can promote it to storage/ and point other log
|
|
24
|
+
// producers at it without touching this module's call sites.
|
|
25
|
+
|
|
26
|
+
import { appendFileSync, mkdirSync, renameSync, rmSync, statSync } from 'fs'
|
|
27
|
+
import { join } from 'path'
|
|
28
|
+
|
|
29
|
+
/** Default cap per lifecycle.log file before it rotates to lifecycle.log.1. */
|
|
30
|
+
const DEFAULT_MAX_BYTES = 5 * 1024 * 1024 // 5 MiB
|
|
31
|
+
/** Default number of rotated backups kept (lifecycle.log.1 … .KEEP). */
|
|
32
|
+
const DEFAULT_KEEP = 5
|
|
33
|
+
|
|
34
|
+
/** The durable lifecycle decision log inside `logDir` (cfg.eventLogDir). */
|
|
35
|
+
export function lifecycleLogPath(logDir: string): string {
|
|
36
|
+
return join(logDir, 'lifecycle.log')
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function envPosInt(raw: string | undefined, dflt: number): number {
|
|
40
|
+
const n = parseInt(raw ?? '', 10)
|
|
41
|
+
return Number.isFinite(n) && n > 0 ? n : dflt
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Whether to also log the steady-state non-decisions (alive / skipped-launchd).
|
|
45
|
+
* Off by default — they fire every tick per live/launchd peer and would bury the
|
|
46
|
+
* actual decisions (reap / wake) under heartbeat noise. */
|
|
47
|
+
export function superviseLogVerbose(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
48
|
+
const v = env.IAPEER_SUPERVISE_LOG_VERBOSE?.trim().toLowerCase()
|
|
49
|
+
return v === '1' || v === 'true' || v === 'yes'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** logfmt value: bare token, or double-quoted with `"`/`\` escaped, when it
|
|
53
|
+
* contains whitespace, `=` or `"`. Empty string → `""`. */
|
|
54
|
+
export function fmtValue(v: string | number): string {
|
|
55
|
+
const s = String(v)
|
|
56
|
+
if (s === '') return '""'
|
|
57
|
+
if (/[\s"=]/.test(s)) return `"${s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`
|
|
58
|
+
return s
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Render one logfmt line (ts first, then fields in insertion order; undefined
|
|
62
|
+
* fields are skipped). No trailing newline. Pure — unit-testable. */
|
|
63
|
+
export function formatEventLine(nowMs: number, fields: Record<string, string | number | undefined>): string {
|
|
64
|
+
const parts = [`ts=${new Date(nowMs).toISOString()}`]
|
|
65
|
+
for (const [k, v] of Object.entries(fields)) {
|
|
66
|
+
if (v === undefined) continue
|
|
67
|
+
parts.push(`${k}=${fmtValue(v)}`)
|
|
68
|
+
}
|
|
69
|
+
return parts.join(' ')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Size-rotate `path` (and its .1 … .keep backups) when the next line would push
|
|
73
|
+
* it over `maxBytes`. Drops the oldest, shifts each backup up by one, base→.1.
|
|
74
|
+
* Best-effort: any fs hiccup leaves the chain as-is (we then just append). */
|
|
75
|
+
function rotateIfNeeded(path: string, lineLen: number, maxBytes: number, keep: number): void {
|
|
76
|
+
let size: number
|
|
77
|
+
try {
|
|
78
|
+
size = statSync(path).size
|
|
79
|
+
} catch {
|
|
80
|
+
return // no file yet → nothing to rotate
|
|
81
|
+
}
|
|
82
|
+
if (size + lineLen <= maxBytes) return
|
|
83
|
+
try {
|
|
84
|
+
rmSync(`${path}.${keep}`, { force: true })
|
|
85
|
+
} catch {
|
|
86
|
+
/* best-effort */
|
|
87
|
+
}
|
|
88
|
+
for (let i = keep - 1; i >= 1; i--) {
|
|
89
|
+
try {
|
|
90
|
+
renameSync(`${path}.${i}`, `${path}.${i + 1}`)
|
|
91
|
+
} catch {
|
|
92
|
+
/* that backup may not exist yet */
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
try {
|
|
96
|
+
renameSync(path, `${path}.1`)
|
|
97
|
+
} catch {
|
|
98
|
+
/* best-effort */
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export interface AppendEventOptions {
|
|
103
|
+
/** Reads the rotation knobs IAPEER_LIFECYCLE_LOG_MAX_BYTES / _KEEP. */
|
|
104
|
+
env?: NodeJS.ProcessEnv
|
|
105
|
+
/** Stamp the line with this epoch-ms (superviseTick passes its own tick clock so
|
|
106
|
+
* the log timestamp agrees with the death/idle accounting). Default Date.now(). */
|
|
107
|
+
nowMs?: number
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Append one lifecycle decision line into `logDir`/lifecycle.log. A falsy `logDir`
|
|
112
|
+
* is a no-op (a partial test cfg without eventLogDir never writes — and never
|
|
113
|
+
* resolves a real path). Fully best-effort — never throws.
|
|
114
|
+
*/
|
|
115
|
+
export function appendLifecycleEvent(
|
|
116
|
+
logDir: string | undefined,
|
|
117
|
+
fields: Record<string, string | number | undefined>,
|
|
118
|
+
opts: AppendEventOptions = {},
|
|
119
|
+
): void {
|
|
120
|
+
if (!logDir) return
|
|
121
|
+
const env = opts.env ?? process.env
|
|
122
|
+
const path = lifecycleLogPath(logDir)
|
|
123
|
+
const line = `${formatEventLine(opts.nowMs ?? Date.now(), fields)}\n`
|
|
124
|
+
const maxBytes = envPosInt(env.IAPEER_LIFECYCLE_LOG_MAX_BYTES, DEFAULT_MAX_BYTES)
|
|
125
|
+
const keep = envPosInt(env.IAPEER_LIFECYCLE_LOG_KEEP, DEFAULT_KEEP)
|
|
126
|
+
try {
|
|
127
|
+
mkdirSync(logDir, { recursive: true, mode: 0o700 })
|
|
128
|
+
rotateIfNeeded(path, line.length, maxBytes, keep)
|
|
129
|
+
appendFileSync(path, line, { mode: 0o600 })
|
|
130
|
+
} catch {
|
|
131
|
+
/* observability is best-effort — a log failure must never break a wake/reap */
|
|
132
|
+
}
|
|
133
|
+
}
|