@agfpd/iapeer 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agfpd/iapeer",
3
- "version": "0.2.6",
3
+ "version": "0.2.7",
4
4
  "description": "Foundation core for the IAPeer multi-agent ecosystem: identity, registry, storage, codec.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -107,6 +107,72 @@ function ready(identity: string): LaunchResult {
107
107
  return { status: 'READY', identity, process_address: identity }
108
108
  }
109
109
 
110
+ // ─────────────────────────────────────────────────────────────────────────────
111
+ // Exit-cause observability — capture WHY a session's process died, AT THE MOMENT
112
+ // of death. The daemon's 60 s supervise tick only learns of a death post-factum
113
+ // (reaped-gone), by which time the exit code/signal — and often the whole tmux
114
+ // server — is gone (the boris-fresh-style blind spot one level deeper than the
115
+ // supervise log). A tmux `pane-died` hook closes it: it fires the instant the
116
+ // pane's leader process exits (with `remain-on-exit on` retaining the dead pane so
117
+ // `#{pane_dead_status}`/`#{pane_dead_signal}` are populated), logs one logfmt line,
118
+ // then kill-sessions the now-dead pane so the daemon's `has-session` death
119
+ // detection (and the always-on KeepAlive block-watch) stay intact.
120
+ //
121
+ // Scope — verified live on tmux 3.6a (3 death modes + the daemon-reap path):
122
+ // • graceful exit → `dead_status=<code> dead_signal=` (code, no signal)
123
+ // • SIGTERM/SIGKILL/crash to the PROCESS → `dead_status= dead_signal=<name>`
124
+ // • daemon-initiated `kill-session` (idle-reap / self-TTL / stop) does NOT fire
125
+ // pane-died → NO line here (those are already in lifecycle.log — no double-log).
126
+ // IRREDUCIBLE GAP: SIGKILL to the tmux SERVER process itself runs no hook (the
127
+ // event loop is gone); only the daemon's post-factum reaped-gone catches that.
128
+ // ─────────────────────────────────────────────────────────────────────────────
129
+
130
+ /** The exit-cause log file inside `exitLogDir` (sibling to lifecycle.log). */
131
+ export function exitLogPath(exitLogDir: string): string {
132
+ return `${exitLogDir}/exits.log`
133
+ }
134
+
135
+ /**
136
+ * Build the tmux `pane-died` hook command string (the value of `set-hook -t <id>
137
+ * pane-died <value>`). On the pane leader's death it appends ONE logfmt line —
138
+ * `ts=<ISO> ev=session-exit identity=<id> dead_status=#{…} dead_signal=#{…}`
139
+ * — to `exitLogFile`, then runs a tmux-NATIVE `kill-session` (no shell `tmux`, so
140
+ * it needs no PATH — launchd gives always-on servers a minimal one). Pure (no I/O)
141
+ * so the exact string is unit-testable. Quoting: the `run-shell` arg is wrapped in
142
+ * tmux SINGLE quotes (literal at the tmux layer, still `#{}`-format-expanded) with
143
+ * sh DOUBLE quotes inside — the two levels never collide; `\n`/`$(…)` pass through
144
+ * tmux untouched to sh. `identity`/`exitLogFile` are assumed free of single quotes
145
+ * (runtime-personality identities and the ~/.iapeer/logs path always are). */
146
+ export function exitCauseHook(identity: string, exitLogFile: string): string {
147
+ const line =
148
+ `ts=%s ev=session-exit identity=${identity} ` +
149
+ `dead_status=#{pane_dead_status} dead_signal=#{pane_dead_signal}\\n`
150
+ const log =
151
+ `printf "${line}" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "${exitLogFile}"`
152
+ return `run-shell '${log}' ; kill-session -t "${identity}"`
153
+ }
154
+
155
+ /** Install the exit-cause observability on a freshly-created session: ensure the
156
+ * exit-log dir exists, turn `remain-on-exit` on (so pane-died can read the dead
157
+ * pane's status/signal) and register the hook. Best-effort — a tmux/FS hiccup
158
+ * here must never fail the launch (observability is never load-bearing). No-op
159
+ * when `exitLogDir` is falsy (a partial/test cfg): `remain-on-exit` stays OFF so
160
+ * behavior is byte-identical to before (and no dead pane can linger un-reaped). */
161
+ function installExitHook(sock: string, identity: string, exitLogDir: string | undefined): void {
162
+ if (!exitLogDir) return
163
+ try {
164
+ mkdirSync(exitLogDir, { recursive: true, mode: 0o700 })
165
+ // remain-on-exit must be ON before the process can die, else pane-died won't
166
+ // retain the dead pane and the status/signal are lost. Set it (and the hook)
167
+ // immediately after new-session — the only un-coverable window is the few ms
168
+ // before this runs, irrelevant for a runtime that takes seconds to initialize.
169
+ tmux(sock, 'set-option', '-t', identity, 'remain-on-exit', 'on')
170
+ tmux(sock, 'set-hook', '-t', identity, 'pane-died', exitCauseHook(identity, exitLogPath(exitLogDir)))
171
+ } catch {
172
+ /* observability is best-effort — never block a wake on a hook-install hiccup */
173
+ }
174
+ }
175
+
110
176
  // ─────────────────────────────────────────────────────────────────────────────
111
177
  // launch — bring up ONE session (runtime-agnostic via the adapter)
112
178
  // ─────────────────────────────────────────────────────────────────────────────
@@ -178,6 +244,14 @@ export const launch: LaunchFn = async (
178
244
  return fail(identity, `tmux new-session failed: ${(start.stderr ?? '').trim() || 'exit ' + start.status}`)
179
245
  }
180
246
 
247
+ // (2.5) Exit-cause observability: a `pane-died` hook that records WHY this
248
+ // session's process dies (status/signal) at the moment of death into
249
+ // <exitLogDir>/exits.log, then kill-sessions the dead pane (so the
250
+ // daemon's has-session death detection + always-on KeepAlive stay intact).
251
+ // Installed ASAP — before pipe-pane — so even a runtime that dies during
252
+ // boot leaves a cause. No-op without cfg.exitLogDir (remain-on-exit off).
253
+ installExitHook(sock, identity, cfg.exitLogDir)
254
+
181
255
  // (3) pipe-pane the session output to the per-identity log.
182
256
  mkdirSync(cfg.logDir, { recursive: true, mode: 0o700 })
183
257
  const paneLog = `${cfg.logDir}/${identity}.log`
@@ -1,5 +1,5 @@
1
1
  import { describe, expect, test } from 'bun:test'
2
- import { getAdapter, launch } from './index.ts'
2
+ import { exitCauseHook, exitLogPath, getAdapter, launch } from './index.ts'
3
3
  import { claudeAdapter } from './adapters/claude.ts'
4
4
  import { codexAdapter } from './adapters/codex.ts'
5
5
  import { telegramAdapter } from './adapters/telegram.ts'
@@ -187,6 +187,39 @@ describe('claudeAdapter', () => {
187
187
  })
188
188
  })
189
189
 
190
+ // ─── exit-cause observability: the pane-died hook builder (pure string) ──────
191
+ describe('exitCauseHook (exit-cause observability)', () => {
192
+ const hook = exitCauseHook('claude-iapeer', '/r/logs/iapeer/exits.log')
193
+
194
+ test('exitLogPath → exits.log sibling to lifecycle.log', () => {
195
+ expect(exitLogPath('/r/logs/iapeer')).toBe('/r/logs/iapeer/exits.log')
196
+ })
197
+ test('reads pane_dead_status AND pane_dead_signal (both death classes)', () => {
198
+ // graceful exit populates #{pane_dead_status}; a signal populates #{pane_dead_signal}.
199
+ expect(hook).toContain('dead_status=#{pane_dead_status}')
200
+ expect(hook).toContain('dead_signal=#{pane_dead_signal}')
201
+ })
202
+ test('one logfmt line: ts + ev=session-exit + identity, appended to the exit log', () => {
203
+ expect(hook).toContain('ev=session-exit')
204
+ expect(hook).toContain('identity=claude-iapeer')
205
+ expect(hook).toContain('ts=%s')
206
+ expect(hook).toContain('>> "/r/logs/iapeer/exits.log"')
207
+ expect(hook).toContain('\\n') // literal backslash-n for sh printf, not a real newline
208
+ })
209
+ test('logs BEFORE it reaps: run-shell (sync, no -b) then tmux-native kill-session', () => {
210
+ // run-shell must NOT be backgrounded (-b) — the printf has to finish before the
211
+ // kill tears the server down, else the line is lost to the race (verified live).
212
+ expect(hook).not.toContain('run-shell -b')
213
+ expect(hook.indexOf('run-shell')).toBeLessThan(hook.indexOf('kill-session'))
214
+ // tmux-NATIVE kill-session (no shell `tmux`) → needs no PATH (launchd minimal env).
215
+ expect(hook).toContain('kill-session -t "claude-iapeer"')
216
+ })
217
+ test('quoting: single-quoted run-shell arg (tmux layer) wrapping double-quoted sh', () => {
218
+ expect(hook).toMatch(/run-shell '.*'/)
219
+ expect(hook).not.toContain("''") // no empty/again-collapsed single-quote pair
220
+ })
221
+ })
222
+
190
223
  // ─── Ф-A #2: deliveryMarkers OWNED by the adapter (07.06 refactor) ───────────
191
224
  describe('deliveryMarkers (adapter-owned, was transport PROMPT_GLYPHS)', () => {
192
225
  test('claude: ❯ glyph + paste patterns', () => {
@@ -20,7 +20,7 @@ import { spawnSync } from 'child_process'
20
20
  import { join } from 'path'
21
21
  import { INFRA_RUNTIME_BIN_ENV, isInfraRuntime, resolveSockDir } from '../core/constants.ts'
22
22
  import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
23
- import { peerLogsDir } from '../storage/index.ts'
23
+ import { peerLogsDir, pluginLogsDir } from '../storage/index.ts'
24
24
  import { readPeerProfile } from '../identity/index.ts'
25
25
  import { getAdapter, launch } from './index.ts'
26
26
  import type { LaunchConfig, LaunchSpec } from './types.ts'
@@ -102,6 +102,12 @@ export async function runAlwaysOn(personality: string, runtime: string, cwd: str
102
102
  // GLOBAL infra logs (Фаза §8): ~/.iapeer/logs/<personality>/ — match the plist's
103
103
  // stdout/stderr dir (installAlwaysOnPlist), not per-peer <cwd>/.iapeer/logs/.
104
104
  logDir: peerLogsDir(personality, { env }),
105
+ // Exit-cause log → the shared ~/.iapeer/logs/iapeer (== lifecycle eventLogDir),
106
+ // so an infra peer's self-death is recorded next to lifecycle.log too. The hook
107
+ // also reaps the dead pane: without it remain-on-exit would linger a dead pane,
108
+ // keeping sessionAlive() true so runAlwaysOn block-watches forever and KeepAlive
109
+ // never respawns — the hook prevents that regression as well as logging the cause.
110
+ exitLogDir: pluginLogsDir('iapeer', { env }),
105
111
  env,
106
112
  alwaysOn: true,
107
113
  }
@@ -266,6 +266,17 @@ export interface LaunchConfig extends LaunchAdapterConfig {
266
266
  maxAgeSecs: number
267
267
  /** Log dir for pipe-pane output. */
268
268
  logDir: string
269
+ /**
270
+ * Durable EXIT-CAUSE log dir (~/.iapeer/logs/iapeer — next to lifecycle.log,
271
+ * where the investigator looks). When set, launch installs a tmux `pane-died`
272
+ * hook that records WHY the session's process died (exit status / signal) AT THE
273
+ * MOMENT of death, into `<exitLogDir>/exits.log` — the blind spot the daemon's
274
+ * 60 s supervise tick (reaped-gone) can only see post-factum, after the exit code
275
+ * is already lost. Routed through cfg (NOT re-resolved from env) so it is isolated
276
+ * by the same sandbox as the rest of launch; a FALSY dir → no hook installed and
277
+ * `remain-on-exit` stays off (original behavior — a partial/test cfg never writes
278
+ * and never lingers a dead pane). See exitCauseHook in index.ts. */
279
+ exitLogDir?: string
269
280
  env?: NodeJS.ProcessEnv
270
281
  /**
271
282
  * Always-on bring-up (infra runtimes held by launchd KeepAlive): SKIP the
@@ -805,6 +805,10 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
805
805
  readyGateSecs: cfg.readyGateSecs,
806
806
  maxAgeSecs: cfg.maxAgeSecs,
807
807
  logDir: cfg.logDir,
808
+ // Exit-cause log → next to lifecycle.log (~/.iapeer/logs/iapeer), where the
809
+ // investigator already looks: a self-death now leaves `exits.log` with the
810
+ // status/signal the daemon's post-factum reaped-gone could never recover.
811
+ exitLogDir: cfg.eventLogDir,
808
812
  env,
809
813
  }
810
814
  // C2 — initial_prompt (launch-seed): on a FRESH wake, seed the first turn with