@agfpd/iapeer 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -2
- package/src/cli/cli.test.ts +23 -2
- package/src/cli/index.ts +58 -1
- package/src/core/constants.ts +7 -1
- package/src/daemon/main.ts +5 -0
- package/src/lifecycle/eventlog.test.ts +114 -0
- package/src/lifecycle/eventlog.ts +133 -0
- package/src/lifecycle/index.ts +57 -7
- package/src/lifecycle/lifecycle.test.ts +16 -9
- package/src/registry/registry.test.ts +33 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agfpd/iapeer",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Foundation core for the IAPeer multi-agent ecosystem: identity, registry, storage, codec.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -21,7 +21,14 @@
|
|
|
21
21
|
},
|
|
22
22
|
"scripts": {
|
|
23
23
|
"test": "IAPEER_TEST_SANDBOX=1 bun test",
|
|
24
|
-
"typecheck": "tsc --noEmit"
|
|
24
|
+
"typecheck": "tsc --noEmit",
|
|
25
|
+
"release": "npm version patch && npm publish && git push --follow-tags",
|
|
26
|
+
"release:minor": "npm version minor && npm publish && git push --follow-tags",
|
|
27
|
+
"release:major": "npm version major && npm publish && git push --follow-tags",
|
|
28
|
+
"prepublishOnly": "test -z \"$(git status --porcelain)\" || (echo 'release: working tree is dirty — commit or stash before release' >&2 && exit 1)"
|
|
29
|
+
},
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
25
32
|
},
|
|
26
33
|
"dependencies": {
|
|
27
34
|
"@modelcontextprotocol/sdk": "1.29.0",
|
package/src/cli/cli.test.ts
CHANGED
|
@@ -7,8 +7,8 @@ import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
|
7
7
|
import { mkdtempSync, rmSync, writeFileSync } from 'fs'
|
|
8
8
|
import { tmpdir } from 'os'
|
|
9
9
|
import { join } from 'path'
|
|
10
|
-
import { formatListTable, listPeers, parseArgs, sendMessage, startPeer, stopPeer } from './index.ts'
|
|
11
|
-
import { upsertPeer } from '../registry/index.ts'
|
|
10
|
+
import { formatListTable, listPeers, parseArgs, removePeerCli, sendMessage, startPeer, stopPeer } from './index.ts'
|
|
11
|
+
import { findPeer, readPeersIndex, upsertPeer } from '../registry/index.ts'
|
|
12
12
|
import { isStopped, loadLifecycleConfig, setStopped } from '../lifecycle/index.ts'
|
|
13
13
|
import { launchdPlistPath } from '../launch/launchd.ts'
|
|
14
14
|
|
|
@@ -90,6 +90,27 @@ describe('FLEET GUARD (H4) — foreign persistent-peer launchd plist is off-limi
|
|
|
90
90
|
})
|
|
91
91
|
})
|
|
92
92
|
|
|
93
|
+
describe('remove (registry record via the locked writer)', () => {
|
|
94
|
+
test('removes a registered peer through registry.removePeer', async () => {
|
|
95
|
+
await register('zombie')
|
|
96
|
+
const e = env()
|
|
97
|
+
expect(findPeer(readPeersIndex({ env: e }), 'zombie')).not.toBeNull()
|
|
98
|
+
const o = await removePeerCli('zombie', { env: e })
|
|
99
|
+
expect(o.action).toBe('removed')
|
|
100
|
+
expect(findPeer(readPeersIndex({ env: e }), 'zombie')).toBeNull()
|
|
101
|
+
})
|
|
102
|
+
test('removing an absent peer is an idempotent no-op (not an error)', async () => {
|
|
103
|
+
const o = await removePeerCli('never-existed', { env: env() })
|
|
104
|
+
expect(o.action).toBe('absent')
|
|
105
|
+
})
|
|
106
|
+
test('a second remove of the same peer is also a no-op', async () => {
|
|
107
|
+
await register('twice')
|
|
108
|
+
const e = env()
|
|
109
|
+
expect((await removePeerCli('twice', { env: e })).action).toBe('removed')
|
|
110
|
+
expect((await removePeerCli('twice', { env: e })).action).toBe('absent')
|
|
111
|
+
})
|
|
112
|
+
})
|
|
113
|
+
|
|
93
114
|
describe('send validation', () => {
|
|
94
115
|
test('invalid --from identity → throws', async () => {
|
|
95
116
|
await register('alpha')
|
package/src/cli/index.ts
CHANGED
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
} from '../core/constants.ts'
|
|
22
22
|
import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
|
|
23
23
|
import { ensureGlobalIapScaffold } from '../storage/index.ts'
|
|
24
|
-
import { findPeer, readPeersIndex, type PeerRecord } from '../registry/index.ts'
|
|
24
|
+
import { findPeer, readPeersIndex, removePeer, type PeerRecord } from '../registry/index.ts'
|
|
25
25
|
import { isPeerLive, routeControl, routeSend, type WakeFn } from '../transport/index.ts'
|
|
26
26
|
import {
|
|
27
27
|
attachPeer,
|
|
@@ -226,6 +226,51 @@ export function startPeer(personality: string, runtime: string | undefined, opts
|
|
|
226
226
|
return out
|
|
227
227
|
}
|
|
228
228
|
|
|
229
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
230
|
+
// remove — delete a peer's record from the registry through the LOCKED writer
|
|
231
|
+
// (registry.removePeer). Direct edits of peers-profiles.json are refused at
|
|
232
|
+
// storage.ts:304 (locked-writer invariant); this is the operator path that used
|
|
233
|
+
// to require dropping into `bun -e removePeer(...)`. The use case is reaping the
|
|
234
|
+
// ephemeral zombie records a retired spawn leaves behind.
|
|
235
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
export interface RemoveOutcome {
|
|
238
|
+
personality: string
|
|
239
|
+
action: 'removed' | 'absent' | 'refused-live'
|
|
240
|
+
reason?: string
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* remove <peer> [--force]: drop the registry record via the locked writer.
|
|
245
|
+
* IDEMPOTENT — an absent peer is a no-op success (`absent`), never an error.
|
|
246
|
+
* SAFETY: refuses a peer that is currently LIVE on any runtime — deleting a
|
|
247
|
+
* running session's record would orphan it from routing (resolveCallerIdentity /
|
|
248
|
+
* findPeer would no longer resolve it while it still runs). --force overrides.
|
|
249
|
+
* A zombie record is dead by definition, so the guard never blocks the cleanup
|
|
250
|
+
* it exists for.
|
|
251
|
+
*/
|
|
252
|
+
export async function removePeerCli(
|
|
253
|
+
personality: string,
|
|
254
|
+
opts: CliEnvOptions & { force?: boolean } = {},
|
|
255
|
+
): Promise<RemoveOutcome> {
|
|
256
|
+
const env = opts.env ?? process.env
|
|
257
|
+
const peer = findPeer(readPeersIndex({ env }), personality)
|
|
258
|
+
if (!peer) return { personality, action: 'absent' }
|
|
259
|
+
if (!opts.force) {
|
|
260
|
+
const cfg = loadLifecycleConfig(env)
|
|
261
|
+
const liveRt = peer.runtimes.find(rt => isPeerLive(rt, personality, cfg.sockDir))
|
|
262
|
+
if (liveRt) {
|
|
263
|
+
return {
|
|
264
|
+
personality,
|
|
265
|
+
action: 'refused-live',
|
|
266
|
+
reason: `"${personality}" is LIVE on ${liveRt} — removing its registry record would orphan the running session from routing; stop it first or pass --force`,
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
await removePeer(personality, { env })
|
|
271
|
+
return { personality, action: 'removed' }
|
|
272
|
+
}
|
|
273
|
+
|
|
229
274
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
230
275
|
// send — manual IAP send fallback (contract Примитивы §send). Goes through the
|
|
231
276
|
// same router path as send_to_peer (resolve → deliver / wake), in-process so it
|
|
@@ -309,6 +354,7 @@ const USAGE = `usage: iapeer <verb> [args]
|
|
|
309
354
|
list [--json] registered peers + per-runtime liveness
|
|
310
355
|
stop <peer> [runtime] | --all durable-stop a warm peer / bootout an always-on one
|
|
311
356
|
start <peer> [runtime] re-enable a stopped peer / bootstrap an always-on one
|
|
357
|
+
remove <peer> [--force] delete a peer's registry record (locked writer); refuses a LIVE peer unless --force
|
|
312
358
|
send <target> (--message <text> | --message-file <f|->) [--from <id>] [--attachment <p>]… [--topic <t>] manual IAP send (fallback)
|
|
313
359
|
<runtime> launch the cwd's peer (ALWAYS fresh)
|
|
314
360
|
enable <plugin> [peer] [--no-setup] install + enable an agfpd capability for a peer
|
|
@@ -459,6 +505,17 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
|
|
|
459
505
|
for (const o of outcomes) out(`${o.personality} (${o.runtime}): ${o.action}${o.reason ? ` — ${o.reason}` : ''}\n`)
|
|
460
506
|
return outcomes.some(o => o.action === 'refused-foreign-launchd') ? 1 : 0
|
|
461
507
|
}
|
|
508
|
+
case 'remove': {
|
|
509
|
+
// Reap a registry record through the locked writer (the operator path over
|
|
510
|
+
// registry.removePeer). Idempotent on an absent peer (exit 0). Refuses a LIVE
|
|
511
|
+
// peer unless --force (orphaning a running session from routing is the risk).
|
|
512
|
+
if (!positionals[0]) return usage(errOut)
|
|
513
|
+
const o = await removePeerCli(positionals[0], { force: flags.force === true, env })
|
|
514
|
+
if (o.action === 'removed') out(`removed "${o.personality}" from the registry\n`)
|
|
515
|
+
else if (o.action === 'absent') out(`"${o.personality}" not registered — no-op\n`)
|
|
516
|
+
else errOut(`remove: ${o.reason}\n`)
|
|
517
|
+
return o.action === 'refused-live' ? 1 : 0
|
|
518
|
+
}
|
|
462
519
|
case 'send': {
|
|
463
520
|
// Message body from EITHER --message <text> OR --message-file <f> (f='-' →
|
|
464
521
|
// stdin). The runtime packages (telegram/notifier) + monitor deliver via
|
package/src/core/constants.ts
CHANGED
|
@@ -13,7 +13,13 @@ export const SUPPORTED_LOCAL_RUNTIMES = ['claude', 'codex'] as const
|
|
|
13
13
|
export type SupportedLocalRuntime = (typeof SUPPORTED_LOCAL_RUNTIMES)[number]
|
|
14
14
|
|
|
15
15
|
export const PEERS_SCHEMA_VERSION = 2
|
|
16
|
-
|
|
16
|
+
// 450 (was 250): self-documenting API-peer descriptions (notifier timer/watcher)
|
|
17
|
+
// must fit "who the peer is + registration format + a live example" — dense full
|
|
18
|
+
// texts run to ~408 chars; 250 cut them mid-word so the caller could not compose
|
|
19
|
+
// the call. Bumped with Arthur's sanction (2026-06-08). NB: this is COMPILE-TIME
|
|
20
|
+
// baked — the live daemon re-clamps descriptions on read (registry parsePeerRecord),
|
|
21
|
+
// so the running router keeps the OLD limit until restarted onto the new binary.
|
|
22
|
+
export const MAX_DESCRIPTION_LEN = 450
|
|
17
23
|
|
|
18
24
|
// Contract vocabulary (docs/Идентичность, Артур 05.06): the nature of the
|
|
19
25
|
// intelligence expressing itself through a runtime.
|
package/src/daemon/main.ts
CHANGED
|
@@ -101,6 +101,11 @@ export async function startConfiguredDaemon(opts: ConfiguredDaemonOptions = {}):
|
|
|
101
101
|
intervalMs: opts.superviseIntervalMs ?? DEFAULT_SUPERVISE_INTERVAL_MS,
|
|
102
102
|
// idle-reap / zombie-sweep, THEN the eager fresh re-launch for any peer whose
|
|
103
103
|
// session died carrying a .new-eager mark (owner /new; async, best-effort).
|
|
104
|
+
// The DURABLE decision trace (which peer, what outcome, when, why) is emitted
|
|
105
|
+
// INSIDE superviseTick (lifecycle/eventlog.ts → logs/iapeer/lifecycle.log), so
|
|
106
|
+
// every reap is recorded regardless of entry point (this timer AND the heal-at-
|
|
107
|
+
// wake superviseTick inside wakeOrSpawn). The outcomes array drives only the
|
|
108
|
+
// eager relaunch here; the trace does not depend on consuming it.
|
|
104
109
|
tick: async () => {
|
|
105
110
|
const outcomes = superviseTick(cfg, { env })
|
|
106
111
|
await processEagerRelaunches(cfg, outcomes, { env })
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// eventlog — the daemon's durable, rotated lifecycle decision log. Tests the pure
|
|
2
|
+
// logfmt formatter, the append path (into an explicit temp logDir — never the real
|
|
3
|
+
// ~/.iapeer), and the size-rotation chain. No daemon, no tmux — pure FS.
|
|
4
|
+
|
|
5
|
+
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
6
|
+
import { existsSync, mkdtempSync, readFileSync, rmSync, statSync } from 'fs'
|
|
7
|
+
import { tmpdir } from 'os'
|
|
8
|
+
import { join } from 'path'
|
|
9
|
+
import {
|
|
10
|
+
appendLifecycleEvent,
|
|
11
|
+
fmtValue,
|
|
12
|
+
formatEventLine,
|
|
13
|
+
lifecycleLogPath,
|
|
14
|
+
} from './eventlog.ts'
|
|
15
|
+
|
|
16
|
+
const TS = 1_749_470_400_000 // fixed epoch-ms → a stable ISO for golden lines
|
|
17
|
+
const ISO = new Date(TS).toISOString()
|
|
18
|
+
|
|
19
|
+
describe('fmtValue (logfmt escaping)', () => {
|
|
20
|
+
test('bare token stays bare', () => {
|
|
21
|
+
expect(fmtValue('reaped-idle')).toBe('reaped-idle')
|
|
22
|
+
expect(fmtValue('claude-boris')).toBe('claude-boris')
|
|
23
|
+
expect(fmtValue(42)).toBe('42')
|
|
24
|
+
})
|
|
25
|
+
test('empty string → ""', () => {
|
|
26
|
+
expect(fmtValue('')).toBe('""')
|
|
27
|
+
})
|
|
28
|
+
test('whitespace / = / " force quoting and escape', () => {
|
|
29
|
+
expect(fmtValue('session no longer live')).toBe('"session no longer live"')
|
|
30
|
+
expect(fmtValue('a=b')).toBe('"a=b"')
|
|
31
|
+
expect(fmtValue('say "hi"')).toBe('"say \\"hi\\""')
|
|
32
|
+
expect(fmtValue('back\\slash here')).toBe('"back\\\\slash here"')
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('formatEventLine', () => {
|
|
37
|
+
test('ts is first; fields keep insertion order; undefined skipped', () => {
|
|
38
|
+
const line = formatEventLine(TS, {
|
|
39
|
+
ev: 'supervise',
|
|
40
|
+
identity: 'claude-boris',
|
|
41
|
+
action: 'reaped-gone',
|
|
42
|
+
reason: 'session no longer live',
|
|
43
|
+
ref: undefined, // dropped
|
|
44
|
+
outcome: 'fresh-next-msg',
|
|
45
|
+
})
|
|
46
|
+
expect(line).toBe(
|
|
47
|
+
`ts=${ISO} ev=supervise identity=claude-boris action=reaped-gone reason="session no longer live" outcome=fresh-next-msg`,
|
|
48
|
+
)
|
|
49
|
+
})
|
|
50
|
+
test('age field renders as a bare token', () => {
|
|
51
|
+
const line = formatEventLine(TS, { ev: 'supervise', identity: 'claude-x', action: 'reaped-idle', age: '4230s' })
|
|
52
|
+
expect(line).toBe(`ts=${ISO} ev=supervise identity=claude-x action=reaped-idle age=4230s`)
|
|
53
|
+
})
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
describe('appendLifecycleEvent', () => {
|
|
57
|
+
let dir: string
|
|
58
|
+
|
|
59
|
+
beforeEach(() => {
|
|
60
|
+
dir = mkdtempSync(join(tmpdir(), 'iapeer-eventlog-'))
|
|
61
|
+
})
|
|
62
|
+
afterEach(() => {
|
|
63
|
+
rmSync(dir, { recursive: true, force: true })
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('falsy logDir → no-op (a partial cfg never writes / never resolves a real path)', () => {
|
|
67
|
+
expect(() => appendLifecycleEvent(undefined, { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
|
|
68
|
+
expect(() => appendLifecycleEvent('', { ev: 'supervise', identity: 'x' }, { nowMs: TS })).not.toThrow()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
test('writes one logfmt line per call, appended in order', () => {
|
|
72
|
+
appendLifecycleEvent(dir, { ev: 'wake', personality: 'boris', mode: 'fresh', cause: 'crash-or-self-close' }, { nowMs: TS })
|
|
73
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: 'claude-doc', action: 'reaped-gone' }, { nowMs: TS + 1000 })
|
|
74
|
+
const body = readFileSync(lifecycleLogPath(dir), 'utf8')
|
|
75
|
+
const lines = body.trimEnd().split('\n')
|
|
76
|
+
expect(lines).toHaveLength(2)
|
|
77
|
+
expect(lines[0]).toBe(`ts=${ISO} ev=wake personality=boris mode=fresh cause=crash-or-self-close`)
|
|
78
|
+
expect(lines[1]).toContain('ev=supervise identity=claude-doc action=reaped-gone')
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
test('creates the log dir if absent', () => {
|
|
82
|
+
const nested = join(dir, 'logs', 'iapeer')
|
|
83
|
+
appendLifecycleEvent(nested, { ev: 'supervise', identity: 'x' }, { nowMs: TS })
|
|
84
|
+
expect(existsSync(lifecycleLogPath(nested))).toBe(true)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
test('size rotation: base → .1, oldest dropped past keep', () => {
|
|
88
|
+
const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '120', IAPEER_LIFECYCLE_LOG_KEEP: '2' }
|
|
89
|
+
const path = lifecycleLogPath(dir)
|
|
90
|
+
for (let i = 0; i < 6; i++) {
|
|
91
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-peer${i}`, action: 'reaped-gone', n: i }, { env, nowMs: TS + i })
|
|
92
|
+
}
|
|
93
|
+
expect(existsSync(path)).toBe(true)
|
|
94
|
+
expect(existsSync(`${path}.1`)).toBe(true)
|
|
95
|
+
expect(existsSync(`${path}.2`)).toBe(true)
|
|
96
|
+
expect(existsSync(`${path}.3`)).toBe(false) // keep=2 → never a .3
|
|
97
|
+
expect(statSync(path).size).toBeLessThanOrEqual(200)
|
|
98
|
+
expect(readFileSync(path, 'utf8')).toContain('claude-peer5') // newest in the live base file
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
test('rotation preserves chronological order across files (.N oldest, base newest)', () => {
|
|
102
|
+
const env = { IAPEER_LIFECYCLE_LOG_MAX_BYTES: '90', IAPEER_LIFECYCLE_LOG_KEEP: '3' }
|
|
103
|
+
const path = lifecycleLogPath(dir)
|
|
104
|
+
for (let i = 0; i < 4; i++) {
|
|
105
|
+
appendLifecycleEvent(dir, { ev: 'supervise', identity: `claude-p${i}` }, { env, nowMs: TS + i })
|
|
106
|
+
}
|
|
107
|
+
const ordered = ['.3', '.2', '.1', '']
|
|
108
|
+
.map(suf => (existsSync(path + suf) ? readFileSync(path + suf, 'utf8') : ''))
|
|
109
|
+
.join('')
|
|
110
|
+
const seen = [...ordered.matchAll(/identity=claude-p(\d)/g)].map(m => Number(m[1]))
|
|
111
|
+
expect(seen).toEqual([...seen].sort((a, b) => a - b))
|
|
112
|
+
expect(seen[seen.length - 1]).toBe(3) // newest line is p3, in the base file
|
|
113
|
+
})
|
|
114
|
+
})
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// Lifecycle event log — the daemon's DURABLE, ROTATED trace of every lifecycle
|
|
2
|
+
// DECISION it makes. This is the observability gap the boris-fresh incident hit:
|
|
3
|
+
// a peer woke fresh and there was NO record of when/how its prior session ended,
|
|
4
|
+
// nor of the daemon's fresh-vs-resume reasoning, because superviseTick's outcomes
|
|
5
|
+
// were dropped and the daemon never wrote a decision line anywhere.
|
|
6
|
+
//
|
|
7
|
+
// Design:
|
|
8
|
+
// • One line per decision, logfmt (`key=value`, values quoted iff they contain
|
|
9
|
+
// whitespace/quotes/`=`). Human-greppable AND machine-parseable. The state
|
|
10
|
+
// markers (.idle-reaped / .deaths) are CONSUMED on the next wake — this log is
|
|
11
|
+
// the part that survives, so a postmortem can reconstruct the death even after
|
|
12
|
+
// the marker is gone.
|
|
13
|
+
// • Append-only, app-managed SIZE rotation (NOT launchd's stdout/stderr, which
|
|
14
|
+
// are unbounded and truncated on restart). lifecycle.log → .1 … .N.
|
|
15
|
+
// • The target directory is passed IN (cfg.eventLogDir), NOT re-resolved from
|
|
16
|
+
// env — so it is isolated by the SAME cfg the rest of lifecycle routes through
|
|
17
|
+
// (a test that sandboxes cfg.stateDir also sandboxes this log; no leak to the
|
|
18
|
+
// real ~/.iapeer). A falsy dir → no-op (a partial test cfg never writes).
|
|
19
|
+
// • Best-effort throughout: a write/rotate failure is swallowed. Observability
|
|
20
|
+
// must never take down the daemon or fail a wake/reap.
|
|
21
|
+
//
|
|
22
|
+
// Lifted-out-able: the rotate-append primitive is path-parameterized, so the
|
|
23
|
+
// adjacent "log rotation" phase can promote it to storage/ and point other log
|
|
24
|
+
// producers at it without touching this module's call sites.
|
|
25
|
+
|
|
26
|
+
import { appendFileSync, mkdirSync, renameSync, rmSync, statSync } from 'fs'
|
|
27
|
+
import { join } from 'path'
|
|
28
|
+
|
|
29
|
+
/** Default cap per lifecycle.log file before it rotates to lifecycle.log.1. */
|
|
30
|
+
const DEFAULT_MAX_BYTES = 5 * 1024 * 1024 // 5 MiB
|
|
31
|
+
/** Default number of rotated backups kept (lifecycle.log.1 … .KEEP). */
|
|
32
|
+
const DEFAULT_KEEP = 5
|
|
33
|
+
|
|
34
|
+
/** The durable lifecycle decision log inside `logDir` (cfg.eventLogDir). */
|
|
35
|
+
export function lifecycleLogPath(logDir: string): string {
|
|
36
|
+
return join(logDir, 'lifecycle.log')
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function envPosInt(raw: string | undefined, dflt: number): number {
|
|
40
|
+
const n = parseInt(raw ?? '', 10)
|
|
41
|
+
return Number.isFinite(n) && n > 0 ? n : dflt
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Whether to also log the steady-state non-decisions (alive / skipped-launchd).
|
|
45
|
+
* Off by default — they fire every tick per live/launchd peer and would bury the
|
|
46
|
+
* actual decisions (reap / wake) under heartbeat noise. */
|
|
47
|
+
export function superviseLogVerbose(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
48
|
+
const v = env.IAPEER_SUPERVISE_LOG_VERBOSE?.trim().toLowerCase()
|
|
49
|
+
return v === '1' || v === 'true' || v === 'yes'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** logfmt value: bare token, or double-quoted with `"`/`\` escaped, when it
|
|
53
|
+
* contains whitespace, `=` or `"`. Empty string → `""`. */
|
|
54
|
+
export function fmtValue(v: string | number): string {
|
|
55
|
+
const s = String(v)
|
|
56
|
+
if (s === '') return '""'
|
|
57
|
+
if (/[\s"=]/.test(s)) return `"${s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`
|
|
58
|
+
return s
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Render one logfmt line (ts first, then fields in insertion order; undefined
|
|
62
|
+
* fields are skipped). No trailing newline. Pure — unit-testable. */
|
|
63
|
+
export function formatEventLine(nowMs: number, fields: Record<string, string | number | undefined>): string {
|
|
64
|
+
const parts = [`ts=${new Date(nowMs).toISOString()}`]
|
|
65
|
+
for (const [k, v] of Object.entries(fields)) {
|
|
66
|
+
if (v === undefined) continue
|
|
67
|
+
parts.push(`${k}=${fmtValue(v)}`)
|
|
68
|
+
}
|
|
69
|
+
return parts.join(' ')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Size-rotate `path` (and its .1 … .keep backups) when the next line would push
|
|
73
|
+
* it over `maxBytes`. Drops the oldest, shifts each backup up by one, base→.1.
|
|
74
|
+
* Best-effort: any fs hiccup leaves the chain as-is (we then just append). */
|
|
75
|
+
function rotateIfNeeded(path: string, lineLen: number, maxBytes: number, keep: number): void {
|
|
76
|
+
let size: number
|
|
77
|
+
try {
|
|
78
|
+
size = statSync(path).size
|
|
79
|
+
} catch {
|
|
80
|
+
return // no file yet → nothing to rotate
|
|
81
|
+
}
|
|
82
|
+
if (size + lineLen <= maxBytes) return
|
|
83
|
+
try {
|
|
84
|
+
rmSync(`${path}.${keep}`, { force: true })
|
|
85
|
+
} catch {
|
|
86
|
+
/* best-effort */
|
|
87
|
+
}
|
|
88
|
+
for (let i = keep - 1; i >= 1; i--) {
|
|
89
|
+
try {
|
|
90
|
+
renameSync(`${path}.${i}`, `${path}.${i + 1}`)
|
|
91
|
+
} catch {
|
|
92
|
+
/* that backup may not exist yet */
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
try {
|
|
96
|
+
renameSync(path, `${path}.1`)
|
|
97
|
+
} catch {
|
|
98
|
+
/* best-effort */
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export interface AppendEventOptions {
|
|
103
|
+
/** Reads the rotation knobs IAPEER_LIFECYCLE_LOG_MAX_BYTES / _KEEP. */
|
|
104
|
+
env?: NodeJS.ProcessEnv
|
|
105
|
+
/** Stamp the line with this epoch-ms (superviseTick passes its own tick clock so
|
|
106
|
+
* the log timestamp agrees with the death/idle accounting). Default Date.now(). */
|
|
107
|
+
nowMs?: number
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Append one lifecycle decision line into `logDir`/lifecycle.log. A falsy `logDir`
|
|
112
|
+
* is a no-op (a partial test cfg without eventLogDir never writes — and never
|
|
113
|
+
* resolves a real path). Fully best-effort — never throws.
|
|
114
|
+
*/
|
|
115
|
+
export function appendLifecycleEvent(
|
|
116
|
+
logDir: string | undefined,
|
|
117
|
+
fields: Record<string, string | number | undefined>,
|
|
118
|
+
opts: AppendEventOptions = {},
|
|
119
|
+
): void {
|
|
120
|
+
if (!logDir) return
|
|
121
|
+
const env = opts.env ?? process.env
|
|
122
|
+
const path = lifecycleLogPath(logDir)
|
|
123
|
+
const line = `${formatEventLine(opts.nowMs ?? Date.now(), fields)}\n`
|
|
124
|
+
const maxBytes = envPosInt(env.IAPEER_LIFECYCLE_LOG_MAX_BYTES, DEFAULT_MAX_BYTES)
|
|
125
|
+
const keep = envPosInt(env.IAPEER_LIFECYCLE_LOG_KEEP, DEFAULT_KEEP)
|
|
126
|
+
try {
|
|
127
|
+
mkdirSync(logDir, { recursive: true, mode: 0o700 })
|
|
128
|
+
rotateIfNeeded(path, line.length, maxBytes, keep)
|
|
129
|
+
appendFileSync(path, line, { mode: 0o600 })
|
|
130
|
+
} catch {
|
|
131
|
+
/* observability is best-effort — a log failure must never break a wake/reap */
|
|
132
|
+
}
|
|
133
|
+
}
|
package/src/lifecycle/index.ts
CHANGED
|
@@ -40,6 +40,7 @@ import {
|
|
|
40
40
|
type LaunchSpec,
|
|
41
41
|
} from '../launch/index.ts'
|
|
42
42
|
import { composeSystemPrompt, gatherPromptInput } from '../launch/composeSystemPrompt.ts'
|
|
43
|
+
import { appendLifecycleEvent, superviseLogVerbose } from './eventlog.ts'
|
|
43
44
|
|
|
44
45
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
46
|
// Config
|
|
@@ -51,6 +52,11 @@ export interface LifecycleConfig {
|
|
|
51
52
|
sockDir: string
|
|
52
53
|
stateDir: string // ~/.iapeer/state/lifecycle
|
|
53
54
|
logDir: string // ~/.iapeer/logs/lifecycle
|
|
55
|
+
/** Where the durable lifecycle DECISION log (lifecycle.log) is written
|
|
56
|
+
* (~/.iapeer/logs/iapeer — next to daemon-stdout/stderr.log, where the first
|
|
57
|
+
* investigator looks). Routed through cfg — NOT re-resolved from env — so it is
|
|
58
|
+
* isolated by the same sandbox as stateDir (eventlog.ts). */
|
|
59
|
+
eventLogDir: string
|
|
54
60
|
bootDeadlineSecs: number
|
|
55
61
|
readyGateSecs: number
|
|
56
62
|
idleSecs: number
|
|
@@ -74,6 +80,7 @@ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): Lifec
|
|
|
74
80
|
sockDir: resolveSockDir(env),
|
|
75
81
|
stateDir: join(root, STATE_DIR, 'lifecycle'),
|
|
76
82
|
logDir: join(root, LOGS_DIR, 'lifecycle'),
|
|
83
|
+
eventLogDir: join(root, LOGS_DIR, 'iapeer'),
|
|
77
84
|
bootDeadlineSecs: num(env.IAPEER_BOOT_DEADLINE_SECS, 240),
|
|
78
85
|
readyGateSecs: num(env.IAPEER_READY_GATE_SECS, 120),
|
|
79
86
|
idleSecs: num(env.IAPEER_IDLE_SECS, 3600),
|
|
@@ -331,6 +338,10 @@ export interface WakeMode {
|
|
|
331
338
|
/** Set ONLY for an EXPLICIT resume request that found nothing to resume — the
|
|
332
339
|
* caller must fail loud (never a silent fresh fallback). */
|
|
333
340
|
failReason?: string
|
|
341
|
+
/** Which decision branch fired — the durable "why fresh / why resume" reason.
|
|
342
|
+
* Logged by wakeOrSpawn: the .idle-reaped marker is CONSUMED inside this
|
|
343
|
+
* function (branch 3b), so this cause is the only surviving record of it. */
|
|
344
|
+
cause?: string
|
|
334
345
|
}
|
|
335
346
|
|
|
336
347
|
/**
|
|
@@ -371,29 +382,33 @@ export function resolveWakeMode(
|
|
|
371
382
|
incomingTopic?: string,
|
|
372
383
|
): WakeMode {
|
|
373
384
|
// 1. folder-launch → always fresh.
|
|
374
|
-
if (argsResume === false) return { resume: false }
|
|
385
|
+
if (argsResume === false) return { resume: false, cause: 'folder-launch' }
|
|
375
386
|
// 2. attach → always resume, fail-loud if nothing to resume.
|
|
376
387
|
if (argsResume === true) {
|
|
377
388
|
const r = resolveResume(cwd)
|
|
378
|
-
if (!r.ok) return { resume: false, failReason: r.reason ?? 'resume requested but nothing to resume' }
|
|
379
|
-
return { resume: true, resumeRef: r.ref }
|
|
389
|
+
if (!r.ok) return { resume: false, failReason: r.reason ?? 'resume requested but nothing to resume', cause: 'attach-nothing-to-resume' }
|
|
390
|
+
return { resume: true, resumeRef: r.ref, cause: 'attach' }
|
|
380
391
|
}
|
|
381
392
|
// 3. default (a message woke a dead/asleep peer): decide by the death cause.
|
|
382
393
|
// 3a. NOT idle-reaped → it died on its own (crash / self-close) → clean FRESH.
|
|
383
|
-
if (!hasIdleReaped(cfg, identity)) return { resume: false }
|
|
394
|
+
if (!hasIdleReaped(cfg, identity)) return { resume: false, cause: 'crash-or-self-close' }
|
|
384
395
|
// 3b. idle-reaped → resume-eligible. Consume the marker now (it has done its job).
|
|
385
396
|
clearIdleReaped(cfg, identity)
|
|
386
397
|
// human-conversational dialogue never auto-freshes; only an explicit /new resets it.
|
|
387
398
|
if (isHumanConversational(cwd)) {
|
|
388
399
|
const r = resolveResume(cwd)
|
|
389
|
-
return r.ok
|
|
400
|
+
return r.ok
|
|
401
|
+
? { resume: true, resumeRef: r.ref, cause: 'idle-reaped-human' }
|
|
402
|
+
: { resume: false, cause: 'idle-reaped-human-no-resume' }
|
|
390
403
|
}
|
|
391
404
|
// executor: a NEW topic (non-empty and differing from the stored one) means new
|
|
392
405
|
// work → FRESH; same topic, or no incoming topic → continue the work → RESUME.
|
|
393
406
|
const topic = incomingTopic?.trim() ?? ''
|
|
394
|
-
if (topic && topic !== readTopic(cfg, identity)) return { resume: false }
|
|
407
|
+
if (topic && topic !== readTopic(cfg, identity)) return { resume: false, cause: 'idle-reaped-new-topic' }
|
|
395
408
|
const r = resolveResume(cwd)
|
|
396
|
-
return r.ok
|
|
409
|
+
return r.ok
|
|
410
|
+
? { resume: true, resumeRef: r.ref, cause: 'idle-reaped-resume' }
|
|
411
|
+
: { resume: false, cause: 'idle-reaped-no-resume' }
|
|
397
412
|
}
|
|
398
413
|
|
|
399
414
|
export function readSessionStates(cfg: LifecycleConfig): SessionState[] {
|
|
@@ -636,6 +651,13 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
636
651
|
const env = deps.env ?? process.env
|
|
637
652
|
const cfg = deps.cfg ?? loadLifecycleConfig(env)
|
|
638
653
|
|
|
654
|
+
// Durable wake-decision trace (eventlog.ts): one line per bring-up decision —
|
|
655
|
+
// fresh / resume (with the resolveWakeMode cause) or a refusal (stopped / crash-
|
|
656
|
+
// loop / launchd). This is the direct answer to "why did peer X come up fresh",
|
|
657
|
+
// and the only surviving record of the .idle-reaped marker resolveWakeMode consumes.
|
|
658
|
+
const logWake = (fields: Record<string, string | number | undefined>): void =>
|
|
659
|
+
appendLifecycleEvent(cfg.eventLogDir, { ev: 'wake', personality: args.personality, ...fields }, { env })
|
|
660
|
+
|
|
639
661
|
// Heal strays before launching — the sweep-at-spawn-start. This is the SAME
|
|
640
662
|
// H4-guarded superviseTick the daemon timer runs, so both reap entry points
|
|
641
663
|
// (timer + wake) go through one guarded path. Best-effort: never block a wake.
|
|
@@ -651,6 +673,7 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
651
673
|
|
|
652
674
|
// H4 — never wake a launchd-managed peer (launchd KeepAlive owns it).
|
|
653
675
|
if (isLaunchdManaged(args.personality, env)) {
|
|
676
|
+
logWake({ runtime: args.runtime, mode: 'refused', cause: 'launchd-managed' })
|
|
654
677
|
return {
|
|
655
678
|
status: 'FAILED',
|
|
656
679
|
woke: false,
|
|
@@ -681,6 +704,7 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
681
704
|
// halt: refuse with stopped:true so the sender gets an explicit "stopped" error,
|
|
682
705
|
// not a generic "offline" — and no message is queued. `start` clears the flag.
|
|
683
706
|
if (isStopped(cfg, identity)) {
|
|
707
|
+
logWake({ identity, runtime, mode: 'refused', cause: 'stopped' })
|
|
684
708
|
return {
|
|
685
709
|
status: 'FAILED',
|
|
686
710
|
woke: false,
|
|
@@ -698,9 +722,11 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
698
722
|
// refusal). A stop racing DURING the spawn is a narrower window the wake-lock does not
|
|
699
723
|
// cover (stop does not take this lock).
|
|
700
724
|
if (isStopped(cfg, identity)) {
|
|
725
|
+
logWake({ identity, runtime, mode: 'refused', cause: 'stopped-mid-wake' })
|
|
701
726
|
return { status: 'FAILED', woke: false, runtime, stopped: true, reason: `"${args.personality}" (${runtime}) is stopped and not accepting messages; start it to resume` }
|
|
702
727
|
}
|
|
703
728
|
if (isLaunchdManaged(args.personality, env)) {
|
|
729
|
+
logWake({ identity, runtime, mode: 'refused', cause: 'launchd-managed-mid-wake' })
|
|
704
730
|
return { status: 'FAILED', woke: false, runtime, reason: `"${args.personality}" became launchd-managed mid-wake; the daemon does not wake it` }
|
|
705
731
|
}
|
|
706
732
|
// Idempotent fast path inside the lock: a live session wins (a concurrent
|
|
@@ -719,6 +745,7 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
719
745
|
// trims the ring, so the guard only fires on a genuine tight loop.
|
|
720
746
|
const recentDeaths = countRecentDeaths(cfg, identity, cfg.crashLoopWindowSecs, Date.now())
|
|
721
747
|
if (recentDeaths >= cfg.crashLoopMax) {
|
|
748
|
+
logWake({ identity, runtime, mode: 'refused', cause: 'crash-loop', reason: `${recentDeaths} deaths in ${cfg.crashLoopWindowSecs}s` })
|
|
722
749
|
return {
|
|
723
750
|
status: 'FAILED',
|
|
724
751
|
woke: false,
|
|
@@ -732,6 +759,17 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
732
759
|
// that finds nothing to resume fails loud. incomingTopic (args.topic) is the
|
|
733
760
|
// executor discriminator.
|
|
734
761
|
const mode = resolveWakeMode(cfg, identity, cwd, args.resume, c => adapter.resolveResume(c), args.topic)
|
|
762
|
+
// The bring-up decision is the durable trace — log it BEFORE launch (the decision
|
|
763
|
+
// stands regardless of whether the subsequent launch succeeds). resolveWakeMode has
|
|
764
|
+
// already consumed any .idle-reaped marker, so `cause` is now its only record.
|
|
765
|
+
logWake({
|
|
766
|
+
identity,
|
|
767
|
+
runtime,
|
|
768
|
+
mode: mode.failReason ? 'fail' : mode.resume ? 'resume' : 'fresh',
|
|
769
|
+
cause: mode.cause,
|
|
770
|
+
ref: mode.resumeRef,
|
|
771
|
+
reason: mode.failReason,
|
|
772
|
+
})
|
|
735
773
|
if (mode.failReason) return { status: 'FAILED', woke: false, runtime, reason: mode.failReason }
|
|
736
774
|
const resume = mode.resume
|
|
737
775
|
const resumeRef = mode.resumeRef
|
|
@@ -844,11 +882,19 @@ export interface SuperviseDeps {
|
|
|
844
882
|
export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): SuperviseOutcome[] {
|
|
845
883
|
const env = deps.env ?? process.env
|
|
846
884
|
const nowMs = deps.nowMs ?? Date.now()
|
|
885
|
+
const verbose = superviseLogVerbose(env)
|
|
886
|
+
// Durable decision trace (eventlog.ts): every reap/death/eager-fresh gets a line
|
|
887
|
+
// so a postmortem can answer "when & how did peer X's prior session end" even
|
|
888
|
+
// after the .idle-reaped / .deaths markers are consumed. alive / skipped-launchd
|
|
889
|
+
// are steady-state non-decisions → logged only under IAPEER_SUPERVISE_LOG_VERBOSE.
|
|
890
|
+
const trace = (fields: Record<string, string | number | undefined>): void =>
|
|
891
|
+
appendLifecycleEvent(cfg.eventLogDir, { ev: 'supervise', ...fields }, { env, nowMs })
|
|
847
892
|
const out: SuperviseOutcome[] = []
|
|
848
893
|
for (const s of readSessionStates(cfg)) {
|
|
849
894
|
// H4 — FIRST, before any reap. A launchd-managed peer is read-only.
|
|
850
895
|
if (isLaunchdManaged(s.personality, env)) {
|
|
851
896
|
out.push({ identity: s.identity, action: 'skipped-launchd' })
|
|
897
|
+
if (verbose) trace({ identity: s.identity, action: 'skipped-launchd', outcome: 'read-only-h4' })
|
|
852
898
|
continue
|
|
853
899
|
}
|
|
854
900
|
const sock = buildSocketPath(s.runtime, s.personality, cfg.sockDir)
|
|
@@ -869,11 +915,13 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
869
915
|
personality: s.personality,
|
|
870
916
|
runtime: s.runtime,
|
|
871
917
|
})
|
|
918
|
+
trace({ identity: s.identity, action: 'needs-eager-fresh', reason: '/new eager mark', outcome: 'eager-fresh' })
|
|
872
919
|
continue
|
|
873
920
|
}
|
|
874
921
|
// Crash / self-close: NO marker written, NO eager relaunch — the peer stays
|
|
875
922
|
// asleep and wakes FRESH lazily on the next message (resolveWakeMode branch 3a).
|
|
876
923
|
out.push({ identity: s.identity, action: 'reaped-gone', reason: 'session no longer live' })
|
|
924
|
+
trace({ identity: s.identity, action: 'reaped-gone', reason: 'session no longer live', outcome: 'fresh-next-msg' })
|
|
877
925
|
continue
|
|
878
926
|
}
|
|
879
927
|
// Idle accounting via the runtime adapter's activity proxy (claude transcript
|
|
@@ -896,8 +944,10 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
896
944
|
setIdleReaped(cfg, s.identity)
|
|
897
945
|
removeSessionState(cfg, s.identity)
|
|
898
946
|
out.push({ identity: s.identity, action: 'reaped-idle', reason: `idle ${ageSecs}s` })
|
|
947
|
+
trace({ identity: s.identity, action: 'reaped-idle', age: `${ageSecs}s`, outcome: 'resume-eligible' })
|
|
899
948
|
} else {
|
|
900
949
|
out.push({ identity: s.identity, action: 'alive' })
|
|
950
|
+
if (verbose) trace({ identity: s.identity, action: 'alive', age: `${ageSecs}s` })
|
|
901
951
|
}
|
|
902
952
|
}
|
|
903
953
|
return out
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
|
|
2
|
-
import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs'
|
|
2
|
+
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'
|
|
3
3
|
import { tmpdir } from 'os'
|
|
4
4
|
import { join } from 'path'
|
|
5
5
|
import {
|
|
@@ -176,6 +176,7 @@ describe('superviseTick H4 guard', () => {
|
|
|
176
176
|
sockDir: '/tmp',
|
|
177
177
|
stateDir,
|
|
178
178
|
logDir: stateDir,
|
|
179
|
+
eventLogDir: stateDir, // isolate the decision log into the temp dir (no real-root leak)
|
|
179
180
|
bootDeadlineSecs: 1,
|
|
180
181
|
readyGateSecs: 1,
|
|
181
182
|
idleSecs: 1,
|
|
@@ -204,11 +205,17 @@ describe('superviseTick H4 guard', () => {
|
|
|
204
205
|
})
|
|
205
206
|
|
|
206
207
|
test('a no-plist peer with a dead session → reaped-gone, state removed', () => {
|
|
208
|
+
const c = cfg()
|
|
207
209
|
const id = writeState('iapeer-supgone') // no plist, no live tmux session
|
|
208
|
-
const out = superviseTick(
|
|
210
|
+
const out = superviseTick(c, { env: env(), nowMs: Date.now() })
|
|
209
211
|
const o = out.find(x => x.identity === id)
|
|
210
212
|
expect(o?.action).toBe('reaped-gone')
|
|
211
213
|
expect(existsSync(join(stateDir, `${id}.session`))).toBe(false)
|
|
214
|
+
// the decision leaves a DURABLE trace line (the observability contract) — and it
|
|
215
|
+
// lands in the SANDBOXED eventLogDir, never the real ~/.iapeer.
|
|
216
|
+
const logged = readFileSync(join(c.eventLogDir, 'lifecycle.log'), 'utf8')
|
|
217
|
+
expect(logged).toContain(`ev=supervise identity=${id} action=reaped-gone`)
|
|
218
|
+
expect(logged).toContain('outcome=fresh-next-msg')
|
|
212
219
|
})
|
|
213
220
|
|
|
214
221
|
test('empty state dir → no outcomes', () => {
|
|
@@ -390,10 +397,10 @@ describe('resolveWakeMode (TARGET: death-cause + peer-type/topic)', () => {
|
|
|
390
397
|
|
|
391
398
|
// ── branch 1/2: explicit fresh / explicit resume (unchanged) ────────────────
|
|
392
399
|
test('argsResume=false (folder-launch) → FRESH', () => {
|
|
393
|
-
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), false, hasTranscript)).toEqual({ resume: false })
|
|
400
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), false, hasTranscript)).toEqual({ resume: false, cause: 'folder-launch' })
|
|
394
401
|
})
|
|
395
402
|
test('argsResume=true (attach) + transcript → RESUME', () => {
|
|
396
|
-
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), true, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
403
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), true, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1', cause: 'attach' })
|
|
397
404
|
})
|
|
398
405
|
test('argsResume=true + nothing to resume → FAIL-LOUD (failReason, no silent fresh)', () => {
|
|
399
406
|
const m = resolveWakeMode(cfg(), 'claude-p', cwd(), true, noTranscript)
|
|
@@ -405,7 +412,7 @@ describe('resolveWakeMode (TARGET: death-cause + peer-type/topic)', () => {
|
|
|
405
412
|
test('DEFAULT + NOT idle-reaped (crash/self-close) → FRESH even when a transcript exists', () => {
|
|
406
413
|
// INVERSION of the old polarity: absence of the daemon's idle-reaped marker = died
|
|
407
414
|
// on its own = clean fresh, NOT a resume of a possibly-broken context.
|
|
408
|
-
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), undefined, hasTranscript)).toEqual({ resume: false })
|
|
415
|
+
expect(resolveWakeMode(cfg(), 'claude-p', cwd(), undefined, hasTranscript)).toEqual({ resume: false, cause: 'crash-or-self-close' })
|
|
409
416
|
})
|
|
410
417
|
|
|
411
418
|
// ── branch 3b: default + idle-reaped → resume-eligible, CONSUME the marker ───
|
|
@@ -413,25 +420,25 @@ describe('resolveWakeMode (TARGET: death-cause + peer-type/topic)', () => {
|
|
|
413
420
|
const c = cfg()
|
|
414
421
|
setIdleReaped(c, 'claude-p')
|
|
415
422
|
const human = cwd(true)
|
|
416
|
-
expect(resolveWakeMode(c, 'claude-p', human, undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
423
|
+
expect(resolveWakeMode(c, 'claude-p', human, undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1', cause: 'idle-reaped-human' })
|
|
417
424
|
expect(hasIdleReaped(c, 'claude-p')).toBe(false) // consumed
|
|
418
425
|
})
|
|
419
426
|
test('DEFAULT + idle-reaped + executor + NO incoming topic → RESUME (continue the work)', () => {
|
|
420
427
|
const c = cfg()
|
|
421
428
|
setIdleReaped(c, 'claude-p')
|
|
422
|
-
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
429
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript)).toEqual({ resume: true, resumeRef: 'uuid-1', cause: 'idle-reaped-resume' })
|
|
423
430
|
})
|
|
424
431
|
test('DEFAULT + idle-reaped + executor + SAME topic → RESUME', () => {
|
|
425
432
|
const c = cfg()
|
|
426
433
|
setIdleReaped(c, 'claude-p')
|
|
427
434
|
writeTopic(c, 'claude-p', 'deploy')
|
|
428
|
-
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'deploy')).toEqual({ resume: true, resumeRef: 'uuid-1' })
|
|
435
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'deploy')).toEqual({ resume: true, resumeRef: 'uuid-1', cause: 'idle-reaped-resume' })
|
|
429
436
|
})
|
|
430
437
|
test('DEFAULT + idle-reaped + executor + DIFFERENT topic → FRESH (new work), marker consumed', () => {
|
|
431
438
|
const c = cfg()
|
|
432
439
|
setIdleReaped(c, 'claude-p')
|
|
433
440
|
writeTopic(c, 'claude-p', 'deploy')
|
|
434
|
-
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'unrelated-bug')).toEqual({ resume: false })
|
|
441
|
+
expect(resolveWakeMode(c, 'claude-p', cwd(false), undefined, hasTranscript, 'unrelated-bug')).toEqual({ resume: false, cause: 'idle-reaped-new-topic' })
|
|
435
442
|
expect(hasIdleReaped(c, 'claude-p')).toBe(false) // consumed even on the fresh executor branch
|
|
436
443
|
})
|
|
437
444
|
})
|
|
@@ -3,6 +3,7 @@ import { mkdtempSync, rmSync, readFileSync, writeFileSync } from 'fs'
|
|
|
3
3
|
import { tmpdir } from 'os'
|
|
4
4
|
import { join } from 'path'
|
|
5
5
|
import {
|
|
6
|
+
clampDescription,
|
|
6
7
|
findPeer,
|
|
7
8
|
readPeersIndex,
|
|
8
9
|
removePeer,
|
|
@@ -10,7 +11,7 @@ import {
|
|
|
10
11
|
withPeersLock,
|
|
11
12
|
type PeerRecord,
|
|
12
13
|
} from './index.ts'
|
|
13
|
-
import { defaultIntelligenceForRuntime, type Intelligence } from '../core/constants.ts'
|
|
14
|
+
import { MAX_DESCRIPTION_LEN, defaultIntelligenceForRuntime, type Intelligence } from '../core/constants.ts'
|
|
14
15
|
import { writeFileAtomic, resolvePeersPaths } from '../storage/index.ts'
|
|
15
16
|
|
|
16
17
|
let root: string
|
|
@@ -398,3 +399,34 @@ describe('Companion fix — withPeersLock fail-closed sandbox isolation', () =>
|
|
|
398
399
|
expect(out).toBe('ok')
|
|
399
400
|
})
|
|
400
401
|
})
|
|
402
|
+
|
|
403
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
404
|
+
// clampDescription boundary — the limit was raised 250 → 450 so self-documenting
|
|
405
|
+
// API-peer descriptions (notifier timer/watcher, ~408 chars) survive intact. The
|
|
406
|
+
// boundary is exact: length == MAX passes untouched, length == MAX+1 truncates.
|
|
407
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
408
|
+
|
|
409
|
+
describe('clampDescription — MAX_DESCRIPTION_LEN boundary (450)', () => {
|
|
410
|
+
test('the limit is 450', () => {
|
|
411
|
+
expect(MAX_DESCRIPTION_LEN).toBe(450)
|
|
412
|
+
})
|
|
413
|
+
test('a 450-char description passes through untouched', () => {
|
|
414
|
+
const at = 'x'.repeat(450)
|
|
415
|
+
const r = clampDescription(at)
|
|
416
|
+
expect(r.truncated).toBe(false)
|
|
417
|
+
expect(r.description).toBe(at)
|
|
418
|
+
expect(r.description.length).toBe(450)
|
|
419
|
+
})
|
|
420
|
+
test('a 451-char description is truncated to 450', () => {
|
|
421
|
+
const over = 'y'.repeat(451)
|
|
422
|
+
const r = clampDescription(over)
|
|
423
|
+
expect(r.truncated).toBe(true)
|
|
424
|
+
expect(r.description.length).toBe(450)
|
|
425
|
+
expect(r.description).toBe('y'.repeat(450))
|
|
426
|
+
})
|
|
427
|
+
test('upsertPeer persists a full 450-char description (no clamp at the boundary)', async () => {
|
|
428
|
+
const desc = 'z'.repeat(450)
|
|
429
|
+
await upsertPeer({ personality: 'verbose', runtime: 'claude', cwd: '/tmp/verbose', description: desc }, opts())
|
|
430
|
+
expect(findPeer(readPeersIndex(opts()), 'verbose')!.description).toBe(desc)
|
|
431
|
+
})
|
|
432
|
+
})
|