@agfpd/iapeer 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/daemon/daemon.test.ts +151 -1
- package/src/daemon/deliverylog.ts +64 -0
- package/src/daemon/index.ts +86 -6
- package/src/daemon/main.test.ts +112 -0
- package/src/daemon/main.ts +91 -1
- package/src/identity/identity.test.ts +17 -0
- package/src/identity/index.ts +13 -0
- package/src/launch/bootdeliver.test.ts +106 -0
- package/src/launch/index.ts +25 -5
- package/src/launch/types.ts +2 -1
- package/src/lifecycle/eventlog.ts +18 -69
- package/src/lifecycle/index.ts +197 -3
- package/src/lifecycle/lifecycle.test.ts +160 -4
- package/src/lifecycle/queue.test.ts +185 -0
- package/src/lifecycle/queue.ts +159 -0
- package/src/provision/index.ts +21 -0
- package/src/provision/provision.test.ts +57 -1
- package/src/storage/rotatelog.test.ts +44 -0
- package/src/storage/rotatelog.ts +109 -0
- package/src/transport/index.ts +39 -0
package/src/identity/index.ts
CHANGED
|
@@ -48,6 +48,14 @@ import {
|
|
|
48
48
|
// Types
|
|
49
49
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
50
50
|
|
|
51
|
+
/** Per-peer wake policy. `ephemeral` = stateless worker: every delivery is handled in
|
|
52
|
+
* a FRESH session, the peer dies after its turn, and a delivery to a still-live session
|
|
53
|
+
* is QUEUED (serial) rather than injected — so each task gets a clean context window.
|
|
54
|
+
* Lifecycle-owned (resolveWakeMode forces fresh; superviseTick reaps post-turn; the
|
|
55
|
+
* daemon drains the queue on death). Absent = normal warm-on-demand (resume-eligible).
|
|
56
|
+
* Enum (not bool) to leave room for future policies. */
|
|
57
|
+
export type WakePolicy = 'ephemeral'
|
|
58
|
+
|
|
51
59
|
export interface PeerProfile {
|
|
52
60
|
personality: string
|
|
53
61
|
runtime: Runtime
|
|
@@ -59,6 +67,8 @@ export interface PeerProfile {
|
|
|
59
67
|
* warm). Carries an opening directive and/or a "I'm up" report. */
|
|
60
68
|
initial_prompt?: string
|
|
61
69
|
interfaces?: PeerInterfaces
|
|
70
|
+
/** Per-peer wake policy (lifecycle-owned). Absent = normal warm-on-demand. */
|
|
71
|
+
wake_policy?: WakePolicy
|
|
62
72
|
}
|
|
63
73
|
|
|
64
74
|
// Write shape: intelligence/description optional so a caller can write a profile
|
|
@@ -211,6 +221,9 @@ export function readPeerProfile(cwd: string = process.cwd()): PeerProfile | null
|
|
|
211
221
|
? { initial_prompt: obj.initial_prompt }
|
|
212
222
|
: {}),
|
|
213
223
|
...(interfaces ? { interfaces } : {}),
|
|
224
|
+
// Wake policy — only the known enum value is honored; anything else → omitted
|
|
225
|
+
// (treated as normal warm-on-demand, never throws on an unknown future value).
|
|
226
|
+
...(obj.wake_policy === 'ephemeral' ? { wake_policy: 'ephemeral' as const } : {}),
|
|
214
227
|
}
|
|
215
228
|
}
|
|
216
229
|
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// Ф-#8b: cold-wake boot first-message delivery via load-buffer + bracketed
|
|
2
|
+
// paste-buffer — the SAME byte-path as warm delivery (transport.deliverViaTmux),
|
|
3
|
+
// replacing the old `send-keys -l` retype. Proven hermetically against a REAL
|
|
4
|
+
// tmux (gated like sockdir.test.ts) with a fake tui adapter whose "runtime" is
|
|
5
|
+
// `cat >> <file>`: whatever the boot path injects into the pane lands verbatim in
|
|
6
|
+
// the file, and the ready-gate keys on that file's mtime (the activity proxy).
|
|
7
|
+
//
|
|
8
|
+
// pty note: `cat` reads the pane pty in CANONICAL mode (line-buffered, ~1 KiB
|
|
9
|
+
// line cap on macOS) — real TUIs run raw and have no such cap. The fixture
|
|
10
|
+
// message therefore uses many sub-1-KiB LINES to total multi-KiB; what this test
|
|
11
|
+
// pins is the INJECTION path (one bracketed paste, no option-parsing traps, no
|
|
12
|
+
// key-by-key retype), not the tty discipline.
|
|
13
|
+
|
|
14
|
+
import { afterEach, describe, expect, test } from 'bun:test'
|
|
15
|
+
import { mkdtempSync, readFileSync, rmSync, statSync } from 'fs'
|
|
16
|
+
import { tmpdir } from 'os'
|
|
17
|
+
import { join } from 'path'
|
|
18
|
+
import { spawnSync } from 'child_process'
|
|
19
|
+
import { launch } from './index.ts'
|
|
20
|
+
import type { LaunchConfig, LaunchSpec, RuntimeAdapter } from './types.ts'
|
|
21
|
+
|
|
22
|
+
const tmuxAvailable = spawnSync('tmux', ['-V'], { stdio: 'ignore' }).status === 0
|
|
23
|
+
|
|
24
|
+
const dirs: string[] = []
|
|
25
|
+
function mkTmp(): string {
|
|
26
|
+
const d = mkdtempSync(join(tmpdir(), 'iapeer-bootdeliver-'))
|
|
27
|
+
dirs.push(d)
|
|
28
|
+
return d
|
|
29
|
+
}
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
while (dirs.length) rmSync(dirs.pop()!, { recursive: true, force: true })
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
/** Fake tui adapter: the "runtime" appends its pty input to `recvPath`; the
|
|
35
|
+
* activity proxy is that file's mtime (absent → null, exactly like a missing
|
|
36
|
+
* transcript), so the boot baseline is 0 and the ready-gate flips on receipt. */
|
|
37
|
+
function catAdapter(recvPath: string): RuntimeAdapter {
|
|
38
|
+
return {
|
|
39
|
+
runtime: 'claude', // any tui Runtime — the adapter object itself is what launch consumes
|
|
40
|
+
kind: 'tui',
|
|
41
|
+
usesDoctrine: false,
|
|
42
|
+
deliveryMarkers: { promptGlyphs: [] },
|
|
43
|
+
buildArgv: () => ['/bin/sh', '-c', `cat >> ${recvPath}`],
|
|
44
|
+
bootDialogKeys: () => null,
|
|
45
|
+
isInputReady: () => true,
|
|
46
|
+
newestActivityMtime: () => {
|
|
47
|
+
try {
|
|
48
|
+
return statSync(recvPath).mtimeMs
|
|
49
|
+
} catch {
|
|
50
|
+
return null
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
permissionDialogActive: () => false,
|
|
54
|
+
permissionDialogKeys: () => [],
|
|
55
|
+
resolveResume: () => ({ ok: true }),
|
|
56
|
+
executeControl: () => null,
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
describe('boot first-message delivery (load-buffer + bracketed paste)', () => {
|
|
61
|
+
test.if(tmuxAvailable)(
|
|
62
|
+
'a multi-line, dash-leading, multi-KiB first message lands INTACT and launch goes READY',
|
|
63
|
+
async () => {
|
|
64
|
+
const root = mkTmp()
|
|
65
|
+
const recv = join(root, 'received.txt')
|
|
66
|
+
const sock = join(root, 'tmux-iap-claude-bootd.sock')
|
|
67
|
+
// The fixture stresses every historical boot-inject trap at once:
|
|
68
|
+
// • leading '-' — the send-keys option-parsing trap (audit #6);
|
|
69
|
+
// • quotes/$()/; — shell-metachar corruption if anything re-quoted the body;
|
|
70
|
+
// • multi-KiB — a size send-keys -l replayed key-by-key (8 × ~700 B lines).
|
|
71
|
+
const firstMessage = [
|
|
72
|
+
'- dash-leading first line (the send-keys option-parsing trap)',
|
|
73
|
+
`quotes "double" 'single' and $dollar \`backtick\` ; semicolon`,
|
|
74
|
+
...Array.from({ length: 8 }, (_, i) => `${i}:${'x'.repeat(700)}`),
|
|
75
|
+
].join('\n')
|
|
76
|
+
const spec: LaunchSpec = {
|
|
77
|
+
personality: 'bootd',
|
|
78
|
+
runtime: 'claude',
|
|
79
|
+
cwd: root,
|
|
80
|
+
identity: 'claude-bootd',
|
|
81
|
+
socketPath: sock,
|
|
82
|
+
intelligence: 'artificial',
|
|
83
|
+
}
|
|
84
|
+
const cfg: LaunchConfig = {
|
|
85
|
+
claudeBin: 'unused',
|
|
86
|
+
codexBin: 'unused',
|
|
87
|
+
sockDir: root,
|
|
88
|
+
bootDeadlineSecs: 10,
|
|
89
|
+
readyGateSecs: 8,
|
|
90
|
+
maxAgeSecs: 120,
|
|
91
|
+
logDir: join(root, 'logs'),
|
|
92
|
+
}
|
|
93
|
+
try {
|
|
94
|
+
const r = await launch(spec, catAdapter(recv), firstMessage, cfg)
|
|
95
|
+
expect(r.status).toBe('READY')
|
|
96
|
+
const got = readFileSync(recv, 'utf8')
|
|
97
|
+
expect(got).toContain('- dash-leading first line (the send-keys option-parsing trap)')
|
|
98
|
+
expect(got).toContain(`quotes "double" 'single' and $dollar \`backtick\` ; semicolon`)
|
|
99
|
+
for (let i = 0; i < 8; i++) expect(got).toContain(`${i}:${'x'.repeat(700)}`)
|
|
100
|
+
} finally {
|
|
101
|
+
spawnSync('tmux', ['-S', sock, 'kill-server'], { stdio: 'ignore' })
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
60000,
|
|
105
|
+
)
|
|
106
|
+
})
|
package/src/launch/index.ts
CHANGED
|
@@ -273,7 +273,8 @@ export const launch: LaunchFn = async (
|
|
|
273
273
|
}
|
|
274
274
|
|
|
275
275
|
// (6) BOOT phase (tui) — baseline the activity proxy, answer startup dialogs,
|
|
276
|
-
// wait for the input surface, then deliver firstMessage (
|
|
276
|
+
// wait for the input surface, then deliver firstMessage (load-buffer +
|
|
277
|
+
// bracketed paste-buffer + Enter — the SAME byte-path as warm delivery).
|
|
277
278
|
// An EMPTY firstMessage is a BARE bring-up (folder-launch with no seed, or an
|
|
278
279
|
// attach-resume that carries no message): reach the input surface and return
|
|
279
280
|
// READY — there is no message to deliver and nothing to ready-gate on (the
|
|
@@ -297,12 +298,31 @@ export const launch: LaunchFn = async (
|
|
|
297
298
|
}
|
|
298
299
|
if (adapter.isInputReady(pane)) {
|
|
299
300
|
if (!hasMessage) return ready(identity) // bare bring-up — session up, no message
|
|
300
|
-
//
|
|
301
|
-
//
|
|
302
|
-
// send-keys
|
|
303
|
-
|
|
301
|
+
// Boot delivery = load-buffer → paste-buffer -p → Enter (Ф-#8b hardening):
|
|
302
|
+
// the SAME mechanism warm delivery uses (transport.deliverViaTmux), replacing
|
|
303
|
+
// the old `send-keys -l`. send-keys retypes the message as literal keystrokes —
|
|
304
|
+
// a multi-KB envelope replays key-by-key through the pty input buffer, and the
|
|
305
|
+
// two paths could diverge (a message that survives warm delivery could be
|
|
306
|
+
// mangled at cold-wake). load-buffer hands the TUI the whole envelope as ONE
|
|
307
|
+
// bracketed paste, byte-identical to the warm path. A load/paste hiccup →
|
|
308
|
+
// retry on the next boot iteration (previously the send-keys result was
|
|
309
|
+
// ignored and a failed inject was declared delivered, failing the wake later
|
|
310
|
+
// with the wrong reason at the ready-gate).
|
|
311
|
+
const bufferName = `iapeer-boot-${process.pid}-${Date.now()}`
|
|
312
|
+
const load = spawnSync(
|
|
313
|
+
'tmux',
|
|
314
|
+
['-S', sock, 'load-buffer', '-b', bufferName, '-'],
|
|
315
|
+
{ input: firstMessage, encoding: 'utf8' },
|
|
316
|
+
)
|
|
317
|
+
if (load.status !== 0) continue
|
|
318
|
+
const paste = tmux(sock, 'paste-buffer', '-p', '-b', bufferName, '-t', identity)
|
|
319
|
+
if (!paste.ok) {
|
|
320
|
+
tmux(sock, 'delete-buffer', '-b', bufferName)
|
|
321
|
+
continue
|
|
322
|
+
}
|
|
304
323
|
await sleep(300)
|
|
305
324
|
tmux(sock, 'send-keys', '-t', identity, 'Enter')
|
|
325
|
+
tmux(sock, 'delete-buffer', '-b', bufferName)
|
|
306
326
|
delivered = true
|
|
307
327
|
}
|
|
308
328
|
}
|
package/src/launch/types.ts
CHANGED
|
@@ -312,7 +312,8 @@ export interface LaunchResult {
|
|
|
312
312
|
* Bring up ONE session: pre-clean stale tmux server → tmux new-session -d with
|
|
313
313
|
* adapter.buildArgv → pipe-pane → session self-TTL → boot (answer dialogs via
|
|
314
314
|
* adapter, wait for adapter.isInputReady, deliver the first message via
|
|
315
|
-
*
|
|
315
|
+
* load-buffer + bracketed paste — the same byte-path as warm delivery) →
|
|
316
|
+
* ready-gate (adapter.newestActivityMtime strictly advances).
|
|
316
317
|
* Runtime-agnostic; all specifics come from the adapter. Returns READY/FAILED.
|
|
317
318
|
* `firstMessage` (the task / routed envelope) is delivered as the boot message;
|
|
318
319
|
* a router runtime skips the TUI boot/ready phases.
|
|
@@ -19,17 +19,22 @@
|
|
|
19
19
|
// • Best-effort throughout: a write/rotate failure is swallowed. Observability
|
|
20
20
|
// must never take down the daemon or fail a wake/reap.
|
|
21
21
|
//
|
|
22
|
-
//
|
|
23
|
-
//
|
|
24
|
-
//
|
|
22
|
+
// The rotate-append primitive was PROMOTED to storage/rotatelog.ts (as this header
|
|
23
|
+
// anticipated) when the daemon's per-delivery log became the second producer
|
|
24
|
+
// (Ф-#8a). This module keeps its public API (appendLifecycleEvent + the logfmt
|
|
25
|
+
// helpers, re-exported) so its call sites and tests are untouched; only the
|
|
26
|
+
// implementation now lives in storage.
|
|
25
27
|
|
|
26
|
-
import { appendFileSync, mkdirSync, renameSync, rmSync, statSync } from 'fs'
|
|
27
28
|
import { join } from 'path'
|
|
29
|
+
import {
|
|
30
|
+
DEFAULT_LOG_KEEP,
|
|
31
|
+
DEFAULT_LOG_MAX_BYTES,
|
|
32
|
+
appendRotatedEvent,
|
|
33
|
+
} from '../storage/rotatelog.ts'
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const DEFAULT_KEEP = 5
|
|
35
|
+
// Re-export the logfmt helpers — historical home of these (consumers import them
|
|
36
|
+
// from eventlog; the implementation moved to storage/rotatelog.ts).
|
|
37
|
+
export { fmtValue, formatEventLine } from '../storage/rotatelog.ts'
|
|
33
38
|
|
|
34
39
|
/** The durable lifecycle decision log inside `logDir` (cfg.eventLogDir). */
|
|
35
40
|
export function lifecycleLogPath(logDir: string): string {
|
|
@@ -49,56 +54,6 @@ export function superviseLogVerbose(env: NodeJS.ProcessEnv = process.env): boole
|
|
|
49
54
|
return v === '1' || v === 'true' || v === 'yes'
|
|
50
55
|
}
|
|
51
56
|
|
|
52
|
-
/** logfmt value: bare token, or double-quoted with `"`/`\` escaped, when it
|
|
53
|
-
* contains whitespace, `=` or `"`. Empty string → `""`. */
|
|
54
|
-
export function fmtValue(v: string | number): string {
|
|
55
|
-
const s = String(v)
|
|
56
|
-
if (s === '') return '""'
|
|
57
|
-
if (/[\s"=]/.test(s)) return `"${s.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`
|
|
58
|
-
return s
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
/** Render one logfmt line (ts first, then fields in insertion order; undefined
|
|
62
|
-
* fields are skipped). No trailing newline. Pure — unit-testable. */
|
|
63
|
-
export function formatEventLine(nowMs: number, fields: Record<string, string | number | undefined>): string {
|
|
64
|
-
const parts = [`ts=${new Date(nowMs).toISOString()}`]
|
|
65
|
-
for (const [k, v] of Object.entries(fields)) {
|
|
66
|
-
if (v === undefined) continue
|
|
67
|
-
parts.push(`${k}=${fmtValue(v)}`)
|
|
68
|
-
}
|
|
69
|
-
return parts.join(' ')
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Size-rotate `path` (and its .1 … .keep backups) when the next line would push
|
|
73
|
-
* it over `maxBytes`. Drops the oldest, shifts each backup up by one, base→.1.
|
|
74
|
-
* Best-effort: any fs hiccup leaves the chain as-is (we then just append). */
|
|
75
|
-
function rotateIfNeeded(path: string, lineLen: number, maxBytes: number, keep: number): void {
|
|
76
|
-
let size: number
|
|
77
|
-
try {
|
|
78
|
-
size = statSync(path).size
|
|
79
|
-
} catch {
|
|
80
|
-
return // no file yet → nothing to rotate
|
|
81
|
-
}
|
|
82
|
-
if (size + lineLen <= maxBytes) return
|
|
83
|
-
try {
|
|
84
|
-
rmSync(`${path}.${keep}`, { force: true })
|
|
85
|
-
} catch {
|
|
86
|
-
/* best-effort */
|
|
87
|
-
}
|
|
88
|
-
for (let i = keep - 1; i >= 1; i--) {
|
|
89
|
-
try {
|
|
90
|
-
renameSync(`${path}.${i}`, `${path}.${i + 1}`)
|
|
91
|
-
} catch {
|
|
92
|
-
/* that backup may not exist yet */
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
try {
|
|
96
|
-
renameSync(path, `${path}.1`)
|
|
97
|
-
} catch {
|
|
98
|
-
/* best-effort */
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
57
|
export interface AppendEventOptions {
|
|
103
58
|
/** Reads the rotation knobs IAPEER_LIFECYCLE_LOG_MAX_BYTES / _KEEP. */
|
|
104
59
|
env?: NodeJS.ProcessEnv
|
|
@@ -119,15 +74,9 @@ export function appendLifecycleEvent(
|
|
|
119
74
|
): void {
|
|
120
75
|
if (!logDir) return
|
|
121
76
|
const env = opts.env ?? process.env
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
mkdirSync(logDir, { recursive: true, mode: 0o700 })
|
|
128
|
-
rotateIfNeeded(path, line.length, maxBytes, keep)
|
|
129
|
-
appendFileSync(path, line, { mode: 0o600 })
|
|
130
|
-
} catch {
|
|
131
|
-
/* observability is best-effort — a log failure must never break a wake/reap */
|
|
132
|
-
}
|
|
77
|
+
appendRotatedEvent(lifecycleLogPath(logDir), fields, {
|
|
78
|
+
nowMs: opts.nowMs,
|
|
79
|
+
maxBytes: envPosInt(env.IAPEER_LIFECYCLE_LOG_MAX_BYTES, DEFAULT_LOG_MAX_BYTES),
|
|
80
|
+
keep: envPosInt(env.IAPEER_LIFECYCLE_LOG_KEEP, DEFAULT_LOG_KEEP),
|
|
81
|
+
})
|
|
133
82
|
}
|
package/src/lifecycle/index.ts
CHANGED
|
@@ -24,10 +24,16 @@ import {
|
|
|
24
24
|
resolveSockDir,
|
|
25
25
|
type Runtime,
|
|
26
26
|
} from '../core/constants.ts'
|
|
27
|
-
import { buildProcessAddress, buildSocketPath } from '../core/socket.ts'
|
|
27
|
+
import { buildProcessAddress, buildSocketPath, parseSessionName } from '../core/socket.ts'
|
|
28
28
|
import { err, ok, type Result } from '../core/errors.ts'
|
|
29
29
|
import { resolveGlobalRoot } from '../storage/index.ts'
|
|
30
30
|
import { readPeerProfile, resolveIdentity } from '../identity/index.ts'
|
|
31
|
+
import {
|
|
32
|
+
ephemeralQueueDepth,
|
|
33
|
+
listQueuedIdentities,
|
|
34
|
+
peekEphemeralTask,
|
|
35
|
+
removeEphemeralTask,
|
|
36
|
+
} from './queue.ts'
|
|
31
37
|
import { findPeer, publicPeerSummary, readPeersIndex, type PeerRecord, type PublicPeerSummary } from '../registry/index.ts'
|
|
32
38
|
// Ф3: launch = HOW to bring up ONE session (runtime-agnostic primitive + adapter).
|
|
33
39
|
// lifecycle decides WHEN/HOW-MANY and delegates the bring-up to launch.
|
|
@@ -65,6 +71,13 @@ export interface LifecycleConfig {
|
|
|
65
71
|
crashLoopMax: number
|
|
66
72
|
/** Crash-loop guard: the sliding window (seconds) the death count is measured over. */
|
|
67
73
|
crashLoopWindowSecs: number
|
|
74
|
+
/** wake_policy:ephemeral M2 — the quiet window (seconds of activity-proxy silence)
|
|
75
|
+
* after which an ARMED ephemeral session (it has sent its outbound reply) is
|
|
76
|
+
* reaped. Long enough for the post-reply housekeeping writes (operative notes)
|
|
77
|
+
* to keep resetting it; far below idleSecs. ASSUMPTION (boris, by design): the
|
|
78
|
+
* transcript mtime is a LIVENESS proxy — "no longer writing" — not a semantic
|
|
79
|
+
* "done" signal. */
|
|
80
|
+
ephemeralQuietSecs: number
|
|
68
81
|
}
|
|
69
82
|
|
|
70
83
|
export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): LifecycleConfig {
|
|
@@ -87,6 +100,7 @@ export function loadLifecycleConfig(env: NodeJS.ProcessEnv = process.env): Lifec
|
|
|
87
100
|
maxAgeSecs: num(env.IAPEER_MAX_AGE_SECS, 14400),
|
|
88
101
|
crashLoopMax: num(env.IAPEER_CRASHLOOP_MAX, 3),
|
|
89
102
|
crashLoopWindowSecs: num(env.IAPEER_CRASHLOOP_WINDOW_SECS, 300),
|
|
103
|
+
ephemeralQuietSecs: num(env.IAPEER_EPHEMERAL_QUIET_SECS, 20),
|
|
90
104
|
}
|
|
91
105
|
}
|
|
92
106
|
|
|
@@ -194,6 +208,14 @@ export function clearStopped(cfg: LifecycleConfig, identity: string): void {
|
|
|
194
208
|
// .deaths : crash-loop guard — a small JSON ring of recent death epoch-ms.
|
|
195
209
|
// .topic : the topic tag of the current/last session (executor fresh-vs-
|
|
196
210
|
// resume discriminator).
|
|
211
|
+
// .ephemeral-armed : wake_policy:ephemeral M2 — set by the DAEMON when it routes
|
|
212
|
+
// an OUTBOUND send from an ephemeral worker (its single final reply, ADR-006:
|
|
213
|
+
// workers send no intermediate messages, so outbound ⇒ the task is answered).
|
|
214
|
+
// An ARMED live session is reaped by superviseTick after a quiet window
|
|
215
|
+
// (die-after-reply). The marker belongs to the session that sent the outbound:
|
|
216
|
+
// it is cleared on the quiet-reap, on that session's own death, and on the
|
|
217
|
+
// next successful launch — it NEVER survives into a successor session (else a
|
|
218
|
+
// stray marker would quiet-reap the next task before its answer).
|
|
197
219
|
//
|
|
198
220
|
// Boolean markers carry an ISO timestamp line (audit-friendly); .deaths is a JSON
|
|
199
221
|
// array; .topic is the raw topic string.
|
|
@@ -247,6 +269,32 @@ export function clearNewEager(cfg: LifecycleConfig, identity: string): void {
|
|
|
247
269
|
}
|
|
248
270
|
}
|
|
249
271
|
|
|
272
|
+
function ephemeralArmedPath(cfg: LifecycleConfig, identity: string): string {
|
|
273
|
+
return join(cfg.stateDir, `${identity}.ephemeral-armed`)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/** True iff the identity's live ephemeral session has sent its outbound reply
|
|
277
|
+
* (→ quiet-reap candidate). */
|
|
278
|
+
export function hasEphemeralArmed(cfg: LifecycleConfig, identity: string): boolean {
|
|
279
|
+
return existsSync(ephemeralArmedPath(cfg, identity))
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** Arm the die-after-reply reap — ONLY the daemon's outbound seam does this
|
|
283
|
+
* (an ephemeral caller's send_to_peer was routed ok). */
|
|
284
|
+
export function setEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
|
|
285
|
+
mkdirSync(cfg.stateDir, { recursive: true, mode: 0o700 })
|
|
286
|
+
writeFileSync(ephemeralArmedPath(cfg, identity), `${new Date().toISOString()}\n`, { mode: 0o600 })
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/** Clear the armed mark (quiet-reap done / the armed session died / a new launch). */
|
|
290
|
+
export function clearEphemeralArmed(cfg: LifecycleConfig, identity: string): void {
|
|
291
|
+
try {
|
|
292
|
+
rmSync(ephemeralArmedPath(cfg, identity), { force: true })
|
|
293
|
+
} catch {
|
|
294
|
+
/* already gone */
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
250
298
|
function deathsPath(cfg: LifecycleConfig, identity: string): string {
|
|
251
299
|
return join(cfg.stateDir, `${identity}.deaths`)
|
|
252
300
|
}
|
|
@@ -359,6 +407,22 @@ function isHumanConversational(cwd: string): boolean {
|
|
|
359
407
|
}
|
|
360
408
|
}
|
|
361
409
|
|
|
410
|
+
/**
|
|
411
|
+
* True iff the peer of `cwd` declares `wake_policy: "ephemeral"` — a stateless worker
|
|
412
|
+
* that ALWAYS wakes fresh on delivery (never resume), dies after its turn, and whose
|
|
413
|
+
* warm-session deliveries are queued (M3). A profile read hiccup → not-ephemeral (safe
|
|
414
|
+
* default: normal warm-on-demand). When BOTH ephemeral and a telegram interface are
|
|
415
|
+
* set, ephemeral WINS in resolveWakeMode (explicit policy beats the inferred human
|
|
416
|
+
* type) — provision warns on that combination; it should not occur for real workers.
|
|
417
|
+
*/
|
|
418
|
+
export function isEphemeralPeer(cwd: string): boolean {
|
|
419
|
+
try {
|
|
420
|
+
return readPeerProfile(cwd)?.wake_policy === 'ephemeral'
|
|
421
|
+
} catch {
|
|
422
|
+
return false
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
362
426
|
/**
|
|
363
427
|
* Decide resume vs fresh on a wake (TARGET redesign). Branch order:
|
|
364
428
|
* 1. argsResume === false (folder-launch `iapeer <runtime>`) → FRESH.
|
|
@@ -390,6 +454,14 @@ export function resolveWakeMode(
|
|
|
390
454
|
return { resume: true, resumeRef: r.ref, cause: 'attach' }
|
|
391
455
|
}
|
|
392
456
|
// 3. default (a message woke a dead/asleep peer): decide by the death cause.
|
|
457
|
+
// 3-ephemeral (M1): a stateless worker ALWAYS wakes fresh on delivery — never resume,
|
|
458
|
+
// regardless of death cause or topic. Its clean-window-per-task is the whole point.
|
|
459
|
+
// Consume a stray .idle-reaped marker so it does not accumulate (it has no effect for
|
|
460
|
+
// an ephemeral peer, which never resumes, but keep state tidy).
|
|
461
|
+
if (isEphemeralPeer(cwd)) {
|
|
462
|
+
clearIdleReaped(cfg, identity)
|
|
463
|
+
return { resume: false, cause: 'ephemeral-policy' }
|
|
464
|
+
}
|
|
393
465
|
// 3a. NOT idle-reaped → it died on its own (crash / self-close) → clean FRESH.
|
|
394
466
|
if (!hasIdleReaped(cfg, identity)) return { resume: false, cause: 'crash-or-self-close' }
|
|
395
467
|
// 3b. idle-reaped → resume-eligible. Consume the marker now (it has done its job).
|
|
@@ -831,6 +903,11 @@ export async function wakeOrSpawn(args: WakeArgs, deps: WakeDeps = {}): Promise<
|
|
|
831
903
|
`[iapeer] WARN session-state write failed for ${identity} — session is live + TTL-bounded but not idle-reap-supervised: ${e instanceof Error ? e.message : String(e)}\n`,
|
|
832
904
|
)
|
|
833
905
|
}
|
|
906
|
+
// A NEWLY launched session starts UNARMED by definition (no outbound yet) — clear
|
|
907
|
+
// any stray .ephemeral-armed so it can never quiet-reap this session before its
|
|
908
|
+
// reply. ONLY here on the actual launch path, NEVER on the live-session fast path
|
|
909
|
+
// above (a live session may be legitimately armed mid-quiet-window).
|
|
910
|
+
clearEphemeralArmed(cfg, identity)
|
|
834
911
|
// Establish the session's topic (executor discriminator) and reset the crash-loop
|
|
835
912
|
// ring — a successful wake means this is NOT a tight crash loop. Best-effort.
|
|
836
913
|
writeTopic(cfg, identity, args.topic?.trim() ?? '')
|
|
@@ -870,10 +947,12 @@ export function killSession(sock: string, identity: string): void {
|
|
|
870
947
|
|
|
871
948
|
export interface SuperviseOutcome {
|
|
872
949
|
identity: string
|
|
873
|
-
action: 'reaped-idle' | 'reaped-gone' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
950
|
+
action: 'reaped-idle' | 'reaped-gone' | 'reaped-ephemeral' | 'skipped-launchd' | 'alive' | 'needs-eager-fresh'
|
|
874
951
|
reason?: string
|
|
875
952
|
/** For 'needs-eager-fresh': the peer to EAGERLY re-launch fresh (its session died
|
|
876
|
-
* carrying a .new-eager mark). The daemon timer drives the async relaunch.
|
|
953
|
+
* carrying a .new-eager mark). The daemon timer drives the async relaunch.
|
|
954
|
+
* Also set on 'reaped-ephemeral' — the M3 queue-drain hook needs the peer to
|
|
955
|
+
* wake fresh for the next queued task. */
|
|
877
956
|
personality?: string
|
|
878
957
|
runtime?: Runtime
|
|
879
958
|
}
|
|
@@ -908,6 +987,10 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
908
987
|
// the idle-reap below) → it died on its own → do NOT write .idle-reaped here.
|
|
909
988
|
recordDeath(cfg, s.identity, nowMs)
|
|
910
989
|
removeSessionState(cfg, s.identity)
|
|
990
|
+
// The .ephemeral-armed mark belongs to THIS (now dead) session — it armed on its
|
|
991
|
+
// outbound reply. Clear it with the session, so a successor session can never be
|
|
992
|
+
// quiet-reaped on a stale mark before answering its own task. No-op otherwise.
|
|
993
|
+
clearEphemeralArmed(cfg, s.identity)
|
|
911
994
|
// A session that died carrying a .new-eager mark is an owner /new: re-launch
|
|
912
995
|
// EAGERLY as fresh (not lazily on the next message). The mark is LEFT for the
|
|
913
996
|
// eager relaunch (processEagerRelaunches) to consume; the daemon timer drives it.
|
|
@@ -939,6 +1022,29 @@ export function superviseTick(cfg: LifecycleConfig, deps: SuperviseDeps = {}): S
|
|
|
939
1022
|
/* no adapter for this runtime yet → wokeAt fallback */
|
|
940
1023
|
}
|
|
941
1024
|
const ageSecs = Math.floor((nowMs - mt) / 1000)
|
|
1025
|
+
// wake_policy:ephemeral M2 — die-after-reply: an ARMED ephemeral session (the
|
|
1026
|
+
// daemon routed its outbound reply) is reaped after a QUIET window, checked
|
|
1027
|
+
// BEFORE the idle branch (quiet ≪ idle). Quiet = the activity proxy stayed
|
|
1028
|
+
// silent for ephemeralQuietSecs — post-reply housekeeping (operative-note
|
|
1029
|
+
// writes) keeps resetting it, so the worker finishes its bookkeeping first.
|
|
1030
|
+
// NOT armed (still mid-task, e.g. a long silent tool run) → the ordinary
|
|
1031
|
+
// idle bound below is its only reaper. Deliberate, policy-driven death:
|
|
1032
|
+
// NO .idle-reaped (an ephemeral peer never resumes) and NO recordDeath
|
|
1033
|
+
// (the crash-loop ring counts faults, not policy reaps).
|
|
1034
|
+
if (isEphemeralPeer(s.cwd) && hasEphemeralArmed(cfg, s.identity) && ageSecs > cfg.ephemeralQuietSecs) {
|
|
1035
|
+
killSession(sock, s.identity)
|
|
1036
|
+
clearEphemeralArmed(cfg, s.identity)
|
|
1037
|
+
removeSessionState(cfg, s.identity)
|
|
1038
|
+
out.push({
|
|
1039
|
+
identity: s.identity,
|
|
1040
|
+
action: 'reaped-ephemeral',
|
|
1041
|
+
reason: `armed, quiet ${ageSecs}s`,
|
|
1042
|
+
personality: s.personality,
|
|
1043
|
+
runtime: s.runtime,
|
|
1044
|
+
})
|
|
1045
|
+
trace({ identity: s.identity, action: 'reaped-ephemeral', age: `${ageSecs}s`, outcome: 'ephemeral-done' })
|
|
1046
|
+
continue
|
|
1047
|
+
}
|
|
942
1048
|
if (ageSecs > cfg.idleSecs) {
|
|
943
1049
|
// THE ONLY place .idle-reaped is written: this is the one death the daemon
|
|
944
1050
|
// INITIATES. Its presence on the next wake = the session was parked cleanly =
|
|
@@ -991,6 +1097,94 @@ export async function processEagerRelaunches(
|
|
|
991
1097
|
return results
|
|
992
1098
|
}
|
|
993
1099
|
|
|
1100
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1101
|
+
// wake_policy:"ephemeral" M3 — the serial-queue DRAIN. Deliveries to an ephemeral
|
|
1102
|
+
// target are always ENQUEUED (transport's injected ephemeral seam → queue.ts);
|
|
1103
|
+
// this is the consumer side: feed the worker ONE task per fresh session.
|
|
1104
|
+
// Re-export the queue API so consumers (daemon main, tests) reach it through the
|
|
1105
|
+
// module index.
|
|
1106
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
1107
|
+
|
|
1108
|
+
export {
|
|
1109
|
+
enqueueEphemeralTask,
|
|
1110
|
+
ephemeralQueueDepth,
|
|
1111
|
+
ephemeralQueueDir,
|
|
1112
|
+
listQueuedIdentities,
|
|
1113
|
+
peekEphemeralTask,
|
|
1114
|
+
removeEphemeralTask,
|
|
1115
|
+
type EphemeralQueueItem,
|
|
1116
|
+
type PeekedQueueItem,
|
|
1117
|
+
} from './queue.ts'
|
|
1118
|
+
|
|
1119
|
+
export interface DrainDeps {
|
|
1120
|
+
env?: NodeJS.ProcessEnv
|
|
1121
|
+
/** Injectable wake (tests); default wakeOrSpawn. */
|
|
1122
|
+
wakeFn?: (args: WakeArgs, deps: WakeDeps) => Promise<WakeResult>
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Feed ONE queued task to an ephemeral worker IFF it has no live session:
|
|
1127
|
+
* peek → wake FRESH (the task is the boot first-message; resolveWakeMode takes
|
|
1128
|
+
* the ephemeral-policy branch) → remove the item ONLY on READY. A FAILED wake
|
|
1129
|
+
* LEAVES the item at the head — the next supervise-tick drain retries it (the
|
|
1130
|
+
* crash-loop guard bounds a tight failure loop, its refusals land in
|
|
1131
|
+
* lifecycle.log). Returns null when there is nothing to do (empty queue, or a
|
|
1132
|
+
* session is still live — invariant: ≤1 live session = exactly one task).
|
|
1133
|
+
* Serialization: concurrent drains converge on wakeOrSpawn's per-identity
|
|
1134
|
+
* wake.lock — the loser takes the idempotent live-session fast path and the
|
|
1135
|
+
* item is removed once, delivered once.
|
|
1136
|
+
*/
|
|
1137
|
+
export async function drainEphemeralQueue(
|
|
1138
|
+
cfg: LifecycleConfig,
|
|
1139
|
+
personality: string,
|
|
1140
|
+
runtime: Runtime,
|
|
1141
|
+
deps: DrainDeps = {},
|
|
1142
|
+
): Promise<WakeResult | null> {
|
|
1143
|
+
const env = deps.env ?? process.env
|
|
1144
|
+
const identity = buildProcessAddress(runtime, personality)
|
|
1145
|
+
const sock = buildSocketPath(runtime, personality, cfg.sockDir)
|
|
1146
|
+
if (sessionAlive(sock, identity)) return null // one task per session — wait for its reap
|
|
1147
|
+
const item = peekEphemeralTask(cfg, identity)
|
|
1148
|
+
if (!item) return null
|
|
1149
|
+
// Durable drain trace (boris acceptance (a)): which item, how deep the queue.
|
|
1150
|
+
appendLifecycleEvent(
|
|
1151
|
+
cfg.eventLogDir,
|
|
1152
|
+
{ ev: 'ephemeral-drain', identity, seq: item.seq, depth: ephemeralQueueDepth(cfg, identity) },
|
|
1153
|
+
{ env },
|
|
1154
|
+
)
|
|
1155
|
+
const wake = deps.wakeFn ?? wakeOrSpawn
|
|
1156
|
+
const result = await wake({ personality, runtime, topic: item.topic, task: item.task }, { cfg, env })
|
|
1157
|
+
if (result.status === 'READY') removeEphemeralTask(cfg, identity, item.seq)
|
|
1158
|
+
return result
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
/**
|
|
1162
|
+
* Drain every identity with a non-empty queue and no live session — the daemon's
|
|
1163
|
+
* supervise-tick hook. ONE mechanism is the whole M3 delivery loop: the inline
|
|
1164
|
+
* kick after an enqueue covers the cold/empty case, and this periodic scan covers
|
|
1165
|
+
* (a) the next task after a reaped-ephemeral (same tick that reaped it),
|
|
1166
|
+
* (b) drain-on-start (the queue is durable across daemon restarts), and
|
|
1167
|
+
* (c) the RETRY of a failed wake (the item was left at the head). H4-guarded:
|
|
1168
|
+
* a launchd-managed peer is never woken by a drain (it should never have a
|
|
1169
|
+
* queue, but the guard is structural, not situational).
|
|
1170
|
+
*/
|
|
1171
|
+
export async function drainAllEphemeralQueues(cfg: LifecycleConfig, deps: DrainDeps = {}): Promise<WakeResult[]> {
|
|
1172
|
+
const env = deps.env ?? process.env
|
|
1173
|
+
const results: WakeResult[] = []
|
|
1174
|
+
for (const identity of listQueuedIdentities(cfg)) {
|
|
1175
|
+
const parsed = parseSessionName(identity)
|
|
1176
|
+
if (!parsed) continue
|
|
1177
|
+
if (isLaunchdManaged(parsed.personality, env)) continue
|
|
1178
|
+
try {
|
|
1179
|
+
const r = await drainEphemeralQueue(cfg, parsed.personality, parsed.runtime, deps)
|
|
1180
|
+
if (r) results.push(r)
|
|
1181
|
+
} catch (e) {
|
|
1182
|
+
results.push({ status: 'FAILED', woke: false, reason: e instanceof Error ? e.message : String(e) })
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
return results
|
|
1186
|
+
}
|
|
1187
|
+
|
|
994
1188
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
995
1189
|
// folderLaunch / attachPeer — operator verbs (contract ЖЦ §Запуск из папки, §attach;
|
|
996
1190
|
// Примитивы §Карта verbs). Both reuse wakeOrSpawn (one bring-up path); the difference
|