@agfpd/iapeer 0.2.18 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agfpd/iapeer",
3
- "version": "0.2.18",
3
+ "version": "0.2.20",
4
4
  "description": "Foundation core for the iapeer multi-agent ecosystem: identity, registry, storage, codec.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/index.ts CHANGED
@@ -40,7 +40,7 @@ import {
40
40
  wakeOrSpawn,
41
41
  } from '../lifecycle/index.ts'
42
42
  import { getAdapter } from '../launch/index.ts'
43
- import { isFoundationOwnedPlist, kickstartDaemon, launchdLabel, launchdPlistPath } from '../launch/launchd.ts'
43
+ import { isFoundationOwnedPlist, kickstartDaemon, launchctlBootstrap, launchdLabel, launchdPlistPath } from '../launch/launchd.ts'
44
44
  import { resolveCallerIdentity, resolveIdentity } from '../identity/index.ts'
45
45
  import { runAlwaysOn } from '../launch/launchdRun.ts'
46
46
  import { installDaemonPlist, startConfiguredDaemon } from '../daemon/main.ts'
@@ -231,10 +231,22 @@ export function startPeer(personality: string, runtime: string | undefined, opts
231
231
  const identity = buildProcessAddress(rt, personality)
232
232
  if (isInfraRuntime(rt)) {
233
233
  const plist = launchdPlistPath(personality, env)
234
- // Audit #13: a failed bootstrap means the peer did NOT start — surface it instead
235
- // of reporting success silently.
236
- const r = spawnSync('launchctl', ['bootstrap', `gui/${uid()}`, plist], { encoding: 'utf8' })
237
- out.push({ personality, runtime: rt, action: 'bootstrap', reason: r.status === 0 ? undefined : `launchctl bootstrap FAILED (exit ${r.status})${(r.stderr ?? '').trim() ? `: ${(r.stderr ?? '').trim()}` : ''} — peer not started` })
234
+ // UNDEAD-JOB-SAFE start (boris's connect-acceptance find 10.06): a bootstrap
235
+ // right after a bootout used to hit the still-dismantling job (exit 5 I/O
236
+ // error) and leave the router DOWN. launchctlBootstrap now waits for the
237
+ // job to vanish and retries with backoff (~22 s budget); a failure after
238
+ // every attempt is LOUD with the manual rescue recipe. (Also gains the
239
+ // sentinel fleet-guard + sandbox guard the raw spawn never had.)
240
+ const r = launchctlBootstrap(personality, plist, env)
241
+ const ok = r.state === 'loaded' || r.state === 'already-loaded' || r.state === 'skipped-sandbox'
242
+ out.push({
243
+ personality,
244
+ runtime: rt,
245
+ action: 'bootstrap',
246
+ reason: ok
247
+ ? undefined
248
+ : `launchctl bootstrap FAILED${r.detail ? `: ${r.detail}` : ''} — peer not started; manual rescue: launchctl bootstrap gui/$(id -u) ${plist}`,
249
+ })
238
250
  } else {
239
251
  clearStopped(cfg, identity)
240
252
  out.push({ personality, runtime: rt, action: 'started' })
@@ -795,10 +807,17 @@ export async function runCli(argv: string[], env: NodeJS.ProcessEnv = process.en
795
807
  ensureGlobalIapScaffold({ env })
796
808
  const r = installIapeer(fileURLToPath(import.meta.url), env)
797
809
  const plist = installDaemonPlist({ env })
810
+ const signingLine =
811
+ r.signing == null
812
+ ? ''
813
+ : r.signing.state === 'failed-soft'
814
+ ? ` WARNING signing: ${r.signing.detail}\n`
815
+ : ` signing: ${r.signing.state}${r.signing.state === 'signed-new-identity' ? ' (local identity created — the one install-time event)' : ''}\n`
798
816
  out(
799
817
  `installed iapeer → ${r.binPath}` +
800
818
  `${r.prevPath ? ` (previous kept: ${r.prevPath})` : ''}` +
801
819
  `${r.size ? ` (${Math.round(r.size / 1e6)}M)` : ''}\n` +
820
+ signingLine +
802
821
  ` scaffold: ~/.iapeer/ ensured (peers/, state, logs, cache, runtimes)\n` +
803
822
  ` daemon plist written: ${plist}\n` +
804
823
  ` (NOT loaded — a live daemon migration is a separate step: launchctl bootstrap gui/$(id -u) ${plist})\n`,
@@ -39,11 +39,12 @@ async function fixture(): Promise<{ env: NodeJS.ProcessEnv; calls: string[][]; r
39
39
  const runTg: TgRunner = (args, e) => {
40
40
  calls.push(args)
41
41
  if (args[0] === 'bot' && args[1] === 'add') {
42
- // the package's behavior: token → bots/<alias>/.env; prints the validated @username
42
+ // the package's behavior: token → bots/<alias>/.env (incl. the username
43
+ // field — the RELIABLE source, live-host fact); stdout also prints one
43
44
  const p = botEnvPath(args[2]!, e)
44
45
  mkdirSync(dirname(p), { recursive: true })
45
- writeFileSync(p, `TELEGRAM_BOT_TOKEN=${args[4]}\n`)
46
- return { status: 0, stdout: 'bot added: @leo_test_bot\n', stderr: '' }
46
+ writeFileSync(p, `TELEGRAM_BOT_TOKEN=${args[4]}\nTELEGRAM_BOT_USERNAME=leo_env_bot\n`)
47
+ return { status: 0, stdout: 'bot added: @leo_stdout_bot\n', stderr: '' }
47
48
  }
48
49
  return { status: 0, stdout: '', stderr: '' }
49
50
  }
@@ -63,7 +64,7 @@ describe('connectTelegram (one flow: bot add → interface → restart → activ
63
64
  const { env, calls, runTg, restarts } = await fixture()
64
65
  const r = await connectTelegram({ peer: 'leo', token: 'T1:abc', env, runTg, restart: okRestart(restarts) })
65
66
  expect(r.state).toBe('connected')
66
- expect(r.username).toBe('@leo_test_bot')
67
+ expect(r.username).toBe('@leo_env_bot') // .env field WINS over the stdout match
67
68
  expect(r.restart?.state).toBe('restarted')
68
69
  expect(restarts).toEqual(['arthur']) // the router = the natural telegram peer, not leo
69
70
  expect(calls[0]).toEqual(['bot', 'add', 'leo', '--token', 'T1:abc'])
@@ -116,6 +117,24 @@ describe('connectTelegram (one flow: bot add → interface → restart → activ
116
117
  expect(r2.state).toBe('refused-no-token')
117
118
  })
118
119
 
120
+ test('username falls back to the bot-add stdout when .env carries no username field', async () => {
121
+ const env = envFor(mkTmp())
122
+ writeRuntimeManifest({ runtime: 'telegram', selfConfig: '/stub/telegram-runtime self-config' }, { env })
123
+ await upsertPeer({ personality: 'leo', runtime: 'claude', cwd: '/tmp/leo', intelligence: 'artificial' }, { env })
124
+ await upsertPeer({ personality: 'arthur', runtime: 'telegram', cwd: '/tmp/arthur', intelligence: 'natural' }, { env })
125
+ const runTg: TgRunner = (args, e) => {
126
+ if (args[0] === 'bot') {
127
+ const p = botEnvPath('leo', e)
128
+ mkdirSync(dirname(p), { recursive: true })
129
+ writeFileSync(p, 'TELEGRAM_BOT_TOKEN=T\n') // no username field (older package)
130
+ return { status: 0, stdout: 'added @stdout_only_bot\n', stderr: '' }
131
+ }
132
+ return { status: 0, stdout: '', stderr: '' }
133
+ }
134
+ const r = await connectTelegram({ peer: 'leo', token: 'T', env, runTg, restart: okRestart([]) })
135
+ expect(r.username).toBe('@stdout_only_bot')
136
+ })
137
+
119
138
  test('bot add failure (getMe refusal on a bad token) → bot-add-failed with the package detail', async () => {
120
139
  const { env } = await fixture()
121
140
  const failTg: TgRunner = args =>
@@ -183,7 +183,17 @@ export async function connectTelegram(opts: ConnectTelegramOptions): Promise<Con
183
183
  if (add.status !== 0) {
184
184
  return { state: 'bot-add-failed', peer, detail: (add.stderr || add.stdout || `exit ${add.status}`).trim() }
185
185
  }
186
- const username = add.stdout.match(/@[A-Za-z0-9_]{3,}/)?.[0]
186
+ // @username: the bots/<alias>/.env TELEGRAM_BOT_USERNAME field is the RELIABLE
187
+ // source (present on the live host; survives a quiet bot-add stdout — boris's
188
+ // acceptance saw the activation line degrade to the BotFather hint). stdout
189
+ // match stays as the fallback.
190
+ const envAfterAdd = readBotEnv(alias, env)
191
+ const envUser = envAfterAdd?.match(/^TELEGRAM_BOT_USERNAME=(.+)$/m)?.[1]?.trim()
192
+ const username = envUser
193
+ ? envUser.startsWith('@')
194
+ ? envUser
195
+ : `@${envUser}`
196
+ : add.stdout.match(/@[A-Za-z0-9_]{3,}/)?.[0]
187
197
 
188
198
  // (2) interface bot — merge the channel binding into the peer's profile.
189
199
  const iface = runTg(['interface', 'bot', alias, '--peer', peer], env)
@@ -18,4 +18,15 @@ describe('resolveSockDir', () => {
18
18
  expect(resolveSockDir({ IAPEER_SOCK_DIR: ' ' })).toBe(DEFAULT_SOCK_DIR)
19
19
  expect(resolveSockDir({ IAPEER_SOCK_DIR: '' })).toBe(DEFAULT_SOCK_DIR)
20
20
  })
21
+ test('IAPEER_ROOT implies socket isolation: <root>/socks (boris e2e find 10.06)', () => {
22
+ // An alt-root used to inherit GLOBAL /tmp — a sandboxed list saw PROD sessions
23
+ // live by name collision, and sandboxed stop/start would have hit prod.
24
+ expect(resolveSockDir({ IAPEER_ROOT: '/tmp/sbx/iapeer' })).toBe('/tmp/sbx/iapeer/socks')
25
+ })
26
+ test('explicit IAPEER_SOCK_DIR wins over the root-derived dir', () => {
27
+ expect(resolveSockDir({ IAPEER_ROOT: '/tmp/sbx/iapeer', IAPEER_SOCK_DIR: '/tmp/elsewhere' })).toBe('/tmp/elsewhere')
28
+ })
29
+ test('prod shape (no IAPEER_ROOT, no IAPEER_SOCK_DIR) stays on /tmp — untouched', () => {
30
+ expect(resolveSockDir({ HOME: '/Users/x' })).toBe('/tmp')
31
+ })
21
32
  })
@@ -2,6 +2,8 @@
2
2
  // Consolidated from inter-agent-protocol/src/lib/constants.ts (wins as-is) and
3
3
  // extended with storage-layer path names (blueprint §1 core/constants).
4
4
 
5
+ import { join } from 'path'
6
+
5
7
  export const NAME_RE = /^[a-z][a-z0-9-]{0,31}$/
6
8
  export const NAME_RE_SOURCE = '^[a-z][a-z0-9-]{0,31}$'
7
9
  export const RUNTIME_RE = /^[a-z][a-z0-9]{0,31}$/
@@ -124,8 +126,18 @@ export const DEFAULT_SOCK_DIR = '/tmp'
124
126
  // scan/resolve, lifecycle, launchdRun) MUST resolve through this ONE helper so they
125
127
  // agree — a site that hardcodes DEFAULT_SOCK_DIR would look in /tmp while a sandbox
126
128
  // (IAPEER_SOCK_DIR set) created the session elsewhere → a false "offline".
129
+ //
130
+ // IAPEER_ROOT IMPLIES SOCKET ISOLATION (boris's e2e find 10.06): an alt-root used
131
+ // to inherit the GLOBAL /tmp, so a sandboxed `list` saw PROD sessions live by name
132
+ // collision, and a sandboxed stop/start would have HIT a prod session. A set root
133
+ // now derives `<root>/socks` unless IAPEER_SOCK_DIR explicitly says otherwise; the
134
+ // prod daemon (no IAPEER_ROOT) keeps the canonical /tmp untouched.
127
135
  export function resolveSockDir(env: NodeJS.ProcessEnv = process.env): string {
128
- return env.IAPEER_SOCK_DIR?.trim() || DEFAULT_SOCK_DIR
136
+ const explicit = env.IAPEER_SOCK_DIR?.trim()
137
+ if (explicit) return explicit
138
+ const root = env.IAPEER_ROOT?.trim()
139
+ if (root) return join(root, 'socks')
140
+ return DEFAULT_SOCK_DIR
129
141
  }
130
142
 
131
143
  // === per-peer cwd scope ===
@@ -5,13 +5,19 @@
5
5
  // plists run the INSTALLED binary, and any edit/git-op in the tree no longer hits
6
6
  // prod. Update = atomic overwrite in place (build to .tmp → rename over), with ONE
7
7
  // .prev for rollback. NO versions/ catalog + resolver-symlink (that pattern is for
8
- // multi-version toolchains; the foundation is one-latest — and a stable path keeps
9
- // macOS TCC rights through updates, which a versioned path would re-prompt).
8
+ // multi-version toolchains; the foundation is one-latest).
9
+ //
10
+ // macOS TCC: a stable PATH is NOT enough to keep grants through updates — TCC keys
11
+ // on the code requirement, and an ad-hoc bun-compiled binary's requirement is its
12
+ // CDHash (changes every build → re-prompts; live-proven 10.06, Артур's DX
13
+ // requirement). signInstalledBinary (signing.ts) re-signs each install with the
14
+ // stable local identity so the designated requirement — and the grants — survive.
10
15
 
11
16
  import { copyFileSync, existsSync, mkdirSync, renameSync, statSync } from 'fs'
12
17
  import { homedir } from 'os'
13
18
  import { join } from 'path'
14
19
  import { spawnSync } from 'child_process'
20
+ import { signInstalledBinary, type SigningOutcome } from './signing.ts'
15
21
 
16
22
  /** The stable host-wide install path of the `iapeer` binary. Standard user-bin (no
17
23
  * admin, not tied to a node/bun version), ON $PATH. The launchd plists reference
@@ -30,6 +36,9 @@ export interface InstallResult {
30
36
  prevPath?: string
31
37
  /** Bytes of the installed binary. */
32
38
  size?: number
39
+ /** Stable-identity re-sign outcome (TCC grants survive updates). Soft: a signing
40
+ * hiccup never fails the install — the binary works ad-hoc-signed. */
41
+ signing?: SigningOutcome
33
42
  }
34
43
 
35
44
  /**
@@ -74,13 +83,16 @@ export function installIapeer(cliEntrypoint: string, env: NodeJS.ProcessEnv = pr
74
83
  copyFileSync(binPath, prevPath)
75
84
  }
76
85
  renameSync(tmp, binPath) // atomic replace in place (POSIX rename over an existing file)
86
+ // Stable-identity re-sign (TCC grants survive updates). AFTER the rename: the
87
+ // signature belongs to the final inode at the final path. Soft-fail by design.
88
+ const signing = signInstalledBinary(binPath, env)
77
89
  let size: number | undefined
78
90
  try {
79
91
  size = statSync(binPath).size
80
92
  } catch {
81
93
  /* best-effort */
82
94
  }
83
- return { binPath, prevPath, size }
95
+ return { binPath, prevPath, size, signing }
84
96
  }
85
97
 
86
98
  /** The previous-binary path kept by the last install for one-step rollback. */
@@ -113,6 +125,9 @@ export function rollbackIapeer(env: NodeJS.ProcessEnv = process.env): RollbackRe
113
125
  try {
114
126
  copyFileSync(prev, tmp)
115
127
  renameSync(tmp, binPath)
128
+ // Keep the stable requirement on the restored bytes too (a .prev taken before
129
+ // the signing era is ad-hoc — re-signing it heals that). Soft by design.
130
+ signInstalledBinary(binPath, env)
116
131
  } catch (e) {
117
132
  try {
118
133
  if (existsSync(tmp)) renameSync(tmp, `${tmp}.discard`) // never leave a half-written tmp on the path
@@ -0,0 +1,84 @@
1
+ // signInstalledBinary — stable-identity re-sign so TCC grants survive updates
2
+ // (Артур's DX requirement 10.06). DI-runner units; the real keychain flow was
3
+ // proven live (/tmp experiment: two binaries, different CDHash, IDENTICAL
4
+ // designated requirement `identifier "com.agfpd.iapeer" and certificate leaf`).
5
+ // The sandbox guard double-checks process.env, so these tests inject a runner
6
+ // AND call with the flag stripped via a direct env — guard tested separately.
7
+
8
+ import { describe, expect, test } from 'bun:test'
9
+ import { SIGNING_IDENTIFIER, SIGNING_IDENTITY_CN, signInstalledBinary, type SigningRunner } from './signing.ts'
10
+
11
+ function harness(opts: { identityExists: boolean; failAt?: 'req' | 'pkcs12' | 'import' | 'codesign' }) {
12
+ const calls: { cmd: string; args: string[] }[] = []
13
+ const run: SigningRunner = (cmd, args) => {
14
+ calls.push({ cmd, args })
15
+ if (cmd === 'security' && args[0] === 'find-identity') {
16
+ return { status: 0, stdout: opts.identityExists ? `1) ABC "${SIGNING_IDENTITY_CN}" (CSSMERR_TP_NOT_TRUSTED)\n` : 'no identities\n', stderr: '' }
17
+ }
18
+ if (cmd.endsWith('openssl') && args[0] === 'req') return { status: opts.failAt === 'req' ? 1 : 0, stdout: '', stderr: 'req boom' }
19
+ if (cmd.endsWith('openssl') && args[0] === 'pkcs12') return { status: opts.failAt === 'pkcs12' ? 1 : 0, stdout: '', stderr: 'p12 boom' }
20
+ if (cmd === 'security' && args[0] === 'import') return { status: opts.failAt === 'import' ? 1 : 0, stdout: '', stderr: 'import boom' }
21
+ if (cmd === 'codesign') return { status: opts.failAt === 'codesign' ? 1 : 0, stdout: '', stderr: 'sign boom' }
22
+ return { status: 0, stdout: '', stderr: '' }
23
+ }
24
+ return { calls, run }
25
+ }
26
+
27
+ // NOTE: process.env.IAPEER_TEST_SANDBOX === '1' under `bun run test`, so the
28
+ // guard SHORT-CIRCUITS every real call — these units therefore stub process.env
29
+ // off for the duration of each call.
30
+ function withSandboxOff<T>(fn: () => T): T {
31
+ const prev = process.env.IAPEER_TEST_SANDBOX
32
+ delete process.env.IAPEER_TEST_SANDBOX
33
+ try {
34
+ return fn()
35
+ } finally {
36
+ if (prev !== undefined) process.env.IAPEER_TEST_SANDBOX = prev
37
+ }
38
+ }
39
+
40
+ describe('signInstalledBinary (stable identity → TCC grants survive updates)', () => {
41
+ test('sandbox guard: never touches the keychain under IAPEER_TEST_SANDBOX', () => {
42
+ const h = harness({ identityExists: true })
43
+ const r = signInstalledBinary('/x/iapeer', { IAPEER_TEST_SANDBOX: '1' } as NodeJS.ProcessEnv, h.run)
44
+ expect(r.state).toBe('skipped-sandbox')
45
+ expect(h.calls.length).toBe(0)
46
+ })
47
+
48
+ test('existing identity → single codesign with the stable identifier', () => {
49
+ const h = harness({ identityExists: true })
50
+ const r = withSandboxOff(() => signInstalledBinary('/x/iapeer', {} as NodeJS.ProcessEnv, h.run))
51
+ expect(r.state).toBe('signed')
52
+ const sign = h.calls.find(c => c.cmd === 'codesign')!
53
+ expect(sign.args).toEqual(['-f', '-s', SIGNING_IDENTITY_CN, '--identifier', SIGNING_IDENTIFIER, '/x/iapeer'])
54
+ // identity lookup is NOT -v (an untrusted self-signed identity must be found)
55
+ const find = h.calls.find(c => c.args[0] === 'find-identity')!
56
+ expect(find.args).not.toContain('-v')
57
+ })
58
+
59
+ test('no identity → created once (openssl req → pkcs12 → import -T codesign), then signed', () => {
60
+ const h = harness({ identityExists: false })
61
+ const r = withSandboxOff(() => signInstalledBinary('/x/iapeer', {} as NodeJS.ProcessEnv, h.run))
62
+ expect(r.state).toBe('signed-new-identity')
63
+ const seq = h.calls.map(c => `${c.cmd.split('/').pop()}:${c.args[0]}`)
64
+ expect(seq).toEqual(['security:find-identity', 'openssl:req', 'openssl:pkcs12', 'security:import', 'codesign:-f'])
65
+ const imp = h.calls.find(c => c.args[0] === 'import')!
66
+ expect(imp.args).toContain('-T') // codesign pre-authorized in the key ACL
67
+ expect(imp.args).toContain('/usr/bin/codesign')
68
+ })
69
+
70
+ test('identity-creation failure → failed-soft with the loud TCC consequence, codesign never attempted', () => {
71
+ const h = harness({ identityExists: false, failAt: 'import' })
72
+ const r = withSandboxOff(() => signInstalledBinary('/x/iapeer', {} as NodeJS.ProcessEnv, h.run))
73
+ expect(r.state).toBe('failed-soft')
74
+ expect(r.detail).toContain('TCC prompts will re-appear')
75
+ expect(h.calls.some(c => c.cmd === 'codesign')).toBe(false)
76
+ })
77
+
78
+ test('codesign failure → failed-soft (install never breaks on a signing hiccup)', () => {
79
+ const h = harness({ identityExists: true, failAt: 'codesign' })
80
+ const r = withSandboxOff(() => signInstalledBinary('/x/iapeer', {} as NodeJS.ProcessEnv, h.run))
81
+ expect(r.state).toBe('failed-soft')
82
+ expect(r.detail).toContain('sign boom')
83
+ })
84
+ })
@@ -0,0 +1,135 @@
1
+ // Stable code-signing for the installed binary — TCC grants must SURVIVE updates
2
+ // (Артур's DX requirement 10.06: «1 раз при установке, потом обновления не должны
3
+ // опять триггерить»).
4
+ //
5
+ // ROOT CAUSE (proven on the host): `bun build --compile` output is ad-hoc
6
+ // linker-signed (Identifier=a.out, no cert chain) — its only stable identity is
7
+ // the CDHash, which CHANGES with every build. macOS TCC keys grants on the code
8
+ // requirement; for an ad-hoc binary that collapses to the cdhash → every update
9
+ // is a NEW TCC subject → re-prompts.
10
+ //
11
+ // FIX (proven live, /tmp experiment 10.06): a LOCAL self-signed code-signing
12
+ // identity ("iapeer Local Codesign", created once at install) re-signs the binary
13
+ // after every build. Two different binaries signed by it carry the IDENTICAL
14
+ // designated requirement:
15
+ // identifier "com.agfpd.iapeer" and certificate leaf = H"<leaf-hash>"
16
+ // — stable across updates, so the TCC grant follows the requirement, not the
17
+ // bytes. Trust of the cert chain is NOT needed: codesign signs with an untrusted
18
+ // (CSSMERR_TP_NOT_TRUSTED) identity fine, and TCC matches the requirement.
19
+ //
20
+ // Failure policy: SOFT. The binary works ad-hoc-signed exactly as before; a
21
+ // signing hiccup must never break install/update. It is reported loud (the
22
+ // operator learns TCC prompts will re-appear) but the install succeeds.
23
+
24
+ import { mkdtempSync, rmSync } from 'fs'
25
+ import { tmpdir } from 'os'
26
+ import { join } from 'path'
27
+ import { spawnSync } from 'child_process'
28
+
29
+ export const SIGNING_IDENTITY_CN = 'iapeer Local Codesign'
30
+ export const SIGNING_IDENTIFIER = 'com.agfpd.iapeer'
31
+
32
+ /** System LibreSSL — ALWAYS present on macOS and its pkcs12 output imports into
33
+ * the keychain directly. (Homebrew OpenSSL 3.x defaults to PBES2/AES p12, which
34
+ * `security import` rejects with "MAC verification failed" unless -legacy —
35
+ * live-caught during the experiment; pinning the system binary removes the
36
+ * PATH-dependent branch entirely.) */
37
+ const SYSTEM_OPENSSL = '/usr/bin/openssl'
38
+
39
+ export interface SigningRunner {
40
+ (cmd: string, args: string[], input?: string): { status: number | null; stdout: string; stderr: string }
41
+ }
42
+
43
+ const defaultRunner: SigningRunner = (cmd, args) => {
44
+ // 90 s ceiling: a keychain GUI prompt left unanswered must not wedge an
45
+ // unattended update forever — it degrades to failed-soft instead.
46
+ const r = spawnSync(cmd, args, { encoding: 'utf8', timeout: 90_000 })
47
+ return { status: r.error ? null : r.status, stdout: r.stdout ?? '', stderr: r.stderr ?? '' }
48
+ }
49
+
50
+ export interface SigningOutcome {
51
+ state:
52
+ | 'signed' // re-signed with the existing identity
53
+ | 'signed-new-identity' // identity created this run (the ONE install-time event), then signed
54
+ | 'skipped-sandbox' // tests never touch the real keychain
55
+ | 'failed-soft' // signing failed — binary stays ad-hoc (works; TCC prompts return)
56
+ detail?: string
57
+ }
58
+
59
+ /** True iff the local signing identity already exists in the keychain. Deliberately
60
+ * NOT `-v` (valid-only): the self-signed cert reads CSSMERR_TP_NOT_TRUSTED, which
61
+ * is fine for signing — `-v` would hide it and re-create endlessly. */
62
+ function identityPresent(run: SigningRunner): boolean {
63
+ const r = run('security', ['find-identity', '-p', 'codesigning'])
64
+ return r.status === 0 && r.stdout.includes(`"${SIGNING_IDENTITY_CN}"`)
65
+ }
66
+
67
+ /** Create the local self-signed code-signing identity (key + cert with EKU
68
+ * codeSigning → p12 → keychain import with codesign pre-authorized via -T).
69
+ * The one-time install event. */
70
+ function createIdentity(run: SigningRunner): { ok: boolean; detail?: string } {
71
+ const dir = mkdtempSync(join(tmpdir(), 'iapeer-signing-'))
72
+ const key = join(dir, 'key.pem')
73
+ const cert = join(dir, 'cert.pem')
74
+ const p12 = join(dir, 'id.p12')
75
+ // Throwaway p12 transport password — the file lives seconds inside a 0700 tmp dir.
76
+ const pass = `iapeer-${process.pid}-${Math.floor(Math.random() * 1e9)}`
77
+ try {
78
+ const req = run(SYSTEM_OPENSSL, [
79
+ 'req', '-x509', '-newkey', 'rsa:2048', '-keyout', key, '-out', cert,
80
+ '-days', '3650', '-nodes', '-subj', `/CN=${SIGNING_IDENTITY_CN}`,
81
+ '-addext', 'keyUsage=digitalSignature', '-addext', 'extendedKeyUsage=codeSigning',
82
+ ])
83
+ if (req.status !== 0) return { ok: false, detail: `openssl req failed: ${req.stderr.trim().split('\n')[0] ?? ''}` }
84
+ const exp = run(SYSTEM_OPENSSL, [
85
+ 'pkcs12', '-export', '-inkey', key, '-in', cert, '-out', p12,
86
+ '-passout', `pass:${pass}`, '-name', SIGNING_IDENTITY_CN,
87
+ ])
88
+ if (exp.status !== 0) return { ok: false, detail: `openssl pkcs12 failed: ${exp.stderr.trim().split('\n')[0] ?? ''}` }
89
+ // -T /usr/bin/codesign pre-authorizes codesign in the key's ACL — at most ONE
90
+ // keychain confirmation at the very first signing (the install-time event).
91
+ const imp = run('security', ['import', p12, '-P', pass, '-T', '/usr/bin/codesign'])
92
+ if (imp.status !== 0) return { ok: false, detail: `security import failed: ${imp.stderr.trim().split('\n')[0] ?? ''}` }
93
+ return { ok: true }
94
+ } finally {
95
+ try {
96
+ rmSync(dir, { recursive: true, force: true })
97
+ } catch {
98
+ /* best-effort cleanup of the throwaway key material */
99
+ }
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Re-sign the installed binary with the stable local identity (creating the
105
+ * identity on first use). Called by installIapeer after the atomic rename —
106
+ * i.e. on EVERY install/update path, so the designated requirement (and with it
107
+ * every TCC grant) stays constant while the bytes change.
108
+ */
109
+ export function signInstalledBinary(
110
+ binPath: string,
111
+ env: NodeJS.ProcessEnv = process.env,
112
+ run: SigningRunner = defaultRunner,
113
+ ): SigningOutcome {
114
+ // Keychain + codesign are HOST-GLOBAL — same fail-closed double-check as the
115
+ // launchctl guards: consult both the passed env and the process env.
116
+ if (env.IAPEER_TEST_SANDBOX === '1' || process.env.IAPEER_TEST_SANDBOX === '1') {
117
+ return { state: 'skipped-sandbox', detail: 'IAPEER_TEST_SANDBOX=1 — not touching the real keychain' }
118
+ }
119
+ let created = false
120
+ if (!identityPresent(run)) {
121
+ const c = createIdentity(run)
122
+ if (!c.ok) {
123
+ return { state: 'failed-soft', detail: `${c.detail} — binary stays ad-hoc-signed (works, but TCC prompts will re-appear after updates)` }
124
+ }
125
+ created = true
126
+ }
127
+ const sign = run('codesign', ['-f', '-s', SIGNING_IDENTITY_CN, '--identifier', SIGNING_IDENTIFIER, binPath])
128
+ if (sign.status !== 0) {
129
+ return {
130
+ state: 'failed-soft',
131
+ detail: `codesign failed: ${sign.stderr.trim().split('\n')[0] ?? `exit ${sign.status}`} — binary stays ad-hoc-signed (works, but TCC prompts will re-appear after updates)`,
132
+ }
133
+ }
134
+ return created ? { state: 'signed-new-identity' } : { state: 'signed' }
135
+ }
@@ -57,6 +57,7 @@ export {
57
57
  installAlwaysOnPlist,
58
58
  isFoundationOwnedPlist,
59
59
  launchctlBootstrap,
60
+ bootstrapJobCore,
60
61
  resolveExecutable,
61
62
  IAPEER_PLIST_OWNER_KEY,
62
63
  } from './launchd.ts'
@@ -65,6 +66,9 @@ export type {
65
66
  InstallAlwaysOnPlistOptions,
66
67
  BootstrapResult,
67
68
  BootstrapState,
69
+ BootstrapCoreDeps,
70
+ BootstrapCoreResult,
71
+ LaunchctlRunner,
68
72
  } from './launchd.ts'
69
73
 
70
74
  // ─────────────────────────────────────────────────────────────────────────────
@@ -10,6 +10,7 @@ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync
10
10
  import { tmpdir } from 'os'
11
11
  import { join } from 'path'
12
12
  import {
13
+ bootstrapJobCore,
13
14
  getAdapter,
14
15
  installAlwaysOnPlist,
15
16
  isFoundationOwnedPlist,
@@ -326,6 +327,74 @@ describe('resolveExecutable + runtime-bin pinning', () => {
326
327
  })
327
328
  })
328
329
 
330
+ // ─────────────────────────────────────────────────────────────────────────────
331
+ // bootstrapJobCore — the undead-job-safe bootstrap (boris's connect-acceptance
332
+ // find 10.06: bootout → immediate bootstrap → exit 5 I/O error → the whole
333
+ // fleet's telegram router stayed DOWN). Pure DI core: run/sleep injected.
334
+ // ─────────────────────────────────────────────────────────────────────────────
335
+
336
+ describe('bootstrapJobCore (undead-job race)', () => {
337
+ type Call = { args: string[] }
338
+ function harness(script: { printStatuses: number[]; bootstrapStatuses: number[] }) {
339
+ const calls: Call[] = []
340
+ const sleeps: number[] = []
341
+ let printI = 0
342
+ let bootI = 0
343
+ const run = (args: string[]) => {
344
+ calls.push({ args })
345
+ if (args[0] === 'print') {
346
+ const status = script.printStatuses[Math.min(printI, script.printStatuses.length - 1)]!
347
+ printI++
348
+ return { status, stderr: '' }
349
+ }
350
+ const status = script.bootstrapStatuses[Math.min(bootI, script.bootstrapStatuses.length - 1)]!
351
+ bootI++
352
+ return { status, stderr: status === 0 ? '' : 'Bootstrap failed: 5: Input/output error' }
353
+ }
354
+ return { calls, sleeps, deps: { run, sleepMs: (ms: number) => void sleeps.push(ms) } }
355
+ }
356
+
357
+ test('clean path: job not listed, first bootstrap succeeds — zero sleeps', () => {
358
+ const h = harness({ printStatuses: [1], bootstrapStatuses: [0] })
359
+ const r = bootstrapJobCore('501', 'com.iapeer.x', '/p.plist', h.deps)
360
+ expect(r).toEqual({ state: 'loaded', attempts: 1 })
361
+ expect(h.sleeps).toEqual([])
362
+ })
363
+
364
+ test("boris's repro: undead job vanishes after polls, first bootstrap exit 5, retry succeeds", () => {
365
+ // print: listed, listed, gone (the bootout dismantle window) → bootstrap:
366
+ // exit 5 once (still racy), success on the retry after backoff.
367
+ const h = harness({ printStatuses: [0, 0, 1, 1, 1], bootstrapStatuses: [5, 0] })
368
+ const r = bootstrapJobCore('501', 'com.iapeer.arthur', '/p.plist', h.deps)
369
+ expect(r.state).toBe('loaded')
370
+ expect(r.attempts).toBe(2)
371
+ expect(h.sleeps.length).toBeGreaterThan(0) // waited for gone + backoff before retry
372
+ })
373
+
374
+ test('genuinely LIVE job (stays listed through the gone budget) → already-loaded, bootstrap NEVER called', () => {
375
+ const h = harness({ printStatuses: [0], bootstrapStatuses: [0] }) // always listed
376
+ const r = bootstrapJobCore('501', 'com.iapeer.x', '/p.plist', { ...h.deps, goneTimeoutMs: 2_000 })
377
+ expect(r).toEqual({ state: 'already-loaded', attempts: 0 })
378
+ expect(h.calls.some(c => c.args[0] === 'bootstrap')).toBe(false)
379
+ })
380
+
381
+ test('every attempt fails → failed with the attempt count and the last stderr (LOUD, not silent)', () => {
382
+ const h = harness({ printStatuses: [1], bootstrapStatuses: [5] })
383
+ const r = bootstrapJobCore('501', 'com.iapeer.x', '/p.plist', h.deps)
384
+ expect(r.state).toBe('failed')
385
+ expect(r.attempts).toBe(4)
386
+ expect(r.detail).toContain('Input/output error')
387
+ expect(r.detail).toContain('4 bootstrap attempts')
388
+ })
389
+
390
+ test('a racing load between attempts reads already-loaded (idempotent success)', () => {
391
+ // first bootstrap fails; before the retry the job shows up listed (raced in)
392
+ const h = harness({ printStatuses: [1, 0], bootstrapStatuses: [5] })
393
+ const r = bootstrapJobCore('501', 'com.iapeer.x', '/p.plist', h.deps)
394
+ expect(r.state).toBe('already-loaded')
395
+ })
396
+ })
397
+
329
398
  describe('runAlwaysOn guard', () => {
330
399
  test('a non-infra runtime is rejected with exit code 1 (no tmux touched)', async () => {
331
400
  expect(await runAlwaysOn('boris', 'claude', '/tmp/whatever')).toBe(1)
@@ -215,6 +215,89 @@ function isLaunchdLoaded(label: string, uid: string): boolean {
215
215
  return spawnSync('launchctl', ['print', `gui/${uid}/${label}`], { stdio: 'ignore' }).status === 0
216
216
  }
217
217
 
218
+ // ─────────────────────────────────────────────────────────────────────────────
219
+ // UNDEAD-JOB-SAFE bootstrap core (boris's live find 10.06, connect acceptance):
220
+ // after `launchctl bootout` launchd dismantles the job ASYNCHRONOUSLY — an
221
+ // immediate `bootstrap` hits the still-listed "undead" job and fails with
222
+ // exit 5 "Input/output error" (the known PP race class, canon «Жизненный цикл
223
+ // запуска persistent-peer и точки гонки»). On the connect flow that left the
224
+ // WHOLE fleet's telegram router down. This core makes every restart-shaped flow
225
+ // (stop→start, connect router restart, update-runtime) survive the race:
226
+ // (1) WAIT-FOR-GONE: while the job is still listed, poll `print` up to
227
+ // goneTimeoutMs. Vanished → proceed to bootstrap. STILL listed at the
228
+ // deadline → it is a genuinely LIVE job (KeepAlive running), not an undead
229
+ // one → 'already-loaded' (the idempotent no-op, same meaning as before).
230
+ // (2) BOOTSTRAP WITH BACKOFF: attempts with [0, 2 s, 5 s, 15 s] pauses
231
+ // (~22 s budget — covers the observed "manual retry succeeded after ~30 s"
232
+ // window), re-checking gone before each retry. All attempts failed →
233
+ // 'failed' with the attempt count and the last stderr, so the caller can
234
+ // print the manual rescue recipe LOUD instead of leaving the job down
235
+ // silently.
236
+ // Pure DI core (run/sleep injected) — unit-testable without launchctl and
237
+ // without tripping the test-sandbox guard that wraps the public function.
238
+ // ─────────────────────────────────────────────────────────────────────────────
239
+
240
+ export interface LaunchctlRunner {
241
+ (args: string[]): { status: number | null; stderr: string }
242
+ }
243
+
244
+ export interface BootstrapCoreDeps {
245
+ run: LaunchctlRunner
246
+ sleepMs: (ms: number) => void
247
+ /** Budget for the undead job to vanish after a bootout (default 10 000 ms). */
248
+ goneTimeoutMs?: number
249
+ /** Pauses BEFORE each bootstrap attempt (default [0, 2000, 5000, 15000]). */
250
+ backoffMs?: number[]
251
+ }
252
+
253
+ export interface BootstrapCoreResult {
254
+ state: 'loaded' | 'already-loaded' | 'failed'
255
+ attempts: number
256
+ detail?: string
257
+ }
258
+
259
+ export function bootstrapJobCore(
260
+ uid: string,
261
+ label: string,
262
+ plistPath: string,
263
+ deps: BootstrapCoreDeps,
264
+ ): BootstrapCoreResult {
265
+ const goneTimeout = deps.goneTimeoutMs ?? 10_000
266
+ const backoffs = deps.backoffMs ?? [0, 2_000, 5_000, 15_000]
267
+ const listed = () => deps.run(['print', `gui/${uid}/${label}`]).status === 0
268
+
269
+ // (1) wait-for-gone (an undead job vanishes within seconds; a LIVE KeepAlive
270
+ // job stays listed → idempotent no-op, exactly the old 'already-loaded').
271
+ if (listed()) {
272
+ const pollStep = 500
273
+ let waited = 0
274
+ while (waited < goneTimeout) {
275
+ deps.sleepMs(pollStep)
276
+ waited += pollStep
277
+ if (!listed()) break
278
+ }
279
+ if (listed()) return { state: 'already-loaded', attempts: 0 }
280
+ }
281
+
282
+ // (2) bootstrap with backoff; re-verify gone before each retry.
283
+ let last = ''
284
+ for (let attempt = 0; attempt < backoffs.length; attempt++) {
285
+ if (backoffs[attempt]! > 0) deps.sleepMs(backoffs[attempt]!)
286
+ if (attempt > 0 && listed()) {
287
+ // the failed attempt may have half-loaded it, or a race loaded it — success
288
+ return { state: 'already-loaded', attempts: attempt }
289
+ }
290
+ const r = deps.run(['bootstrap', `gui/${uid}`, plistPath])
291
+ if (r.status === 0) return { state: 'loaded', attempts: attempt + 1 }
292
+ last = r.stderr.trim() || `exit ${r.status}`
293
+ }
294
+ return {
295
+ state: 'failed',
296
+ attempts: backoffs.length,
297
+ detail: `${backoffs.length} bootstrap attempts failed (last: ${last})`,
298
+ }
299
+ }
300
+
218
301
  export type DaemonRestartState =
219
302
  | 'restarted' // kickstart -k succeeded → the daemon is now on the freshly-installed binary
220
303
  | 'not-loaded' // com.agfpd.iapeer is not in the gui domain → nothing to restart (new binary
@@ -286,13 +369,20 @@ export function launchctlBootstrap(
286
369
  return { state: 'skipped-sandbox', label, detail: 'IAPEER_TEST_SANDBOX=1 — not loading a real launchd job' }
287
370
  }
288
371
  const uid = currentUid()
289
- if (isLaunchdLoaded(label, uid)) return { state: 'already-loaded', label }
290
- const r = spawnSync('launchctl', ['bootstrap', `gui/${uid}`, plistPath], { encoding: 'utf8' })
291
- if (r.status === 0) return { state: 'loaded', label }
292
- // A race could have loaded it between the check and the bootstrap; treat a
293
- // now-loaded service as success (still idempotent).
294
- if (isLaunchdLoaded(label, uid)) return { state: 'already-loaded', label }
295
- return { state: 'failed', label, detail: (r.stderr ?? '').trim() || `launchctl bootstrap exited ${r.status}` }
372
+ // UNDEAD-JOB-SAFE core (boris's connect-acceptance find): wait for a booted-out
373
+ // job to actually vanish, then bootstrap with backoff. A genuinely LIVE job
374
+ // reads 'already-loaded' (idempotent no-op, same semantics as before); only a
375
+ // job that stays failing through every attempt reads 'failed'.
376
+ const core = bootstrapJobCore(uid, label, plistPath, {
377
+ run: args => {
378
+ const r = spawnSync('launchctl', args, { encoding: 'utf8' })
379
+ return { status: r.status, stderr: r.stderr ?? '' }
380
+ },
381
+ sleepMs: ms => spawnSync('sleep', [String(ms / 1000)]),
382
+ })
383
+ return core.state === 'failed'
384
+ ? { state: 'failed', label, detail: core.detail }
385
+ : { state: core.state, label }
296
386
  }
297
387
 
298
388
  export interface InstallAlwaysOnPlistOptions {
@@ -65,11 +65,15 @@ function isExecutable(binOrName: string, env: NodeJS.ProcessEnv = process.env):
65
65
  }
66
66
  }
67
67
  // bare name → PRESENCE probe over PATH (`command -v` semantics), NO spawn.
68
- // History (both live finds 10.06): the original `--version` ANSWER probe HANGS
69
- // FOREVER for codex in a non-tty (three stray probes sat 25+ min); the 10 s
70
- // timeout that replaced it then DEGRADED a LIVE codex to 'runtime-missing'
71
- // masking a working runtime (boris's catch). The skip-decision only asks "is
72
- // the runtime installed", and presence answers that without executing anything.
68
+ // History (live finds 10.06): the original `--version` ANSWER probe hung forever
69
+ // (three stray probes sat 25+ min); the 10 s timeout that replaced it then
70
+ // DEGRADED a live-looking codex to 'runtime-missing' (boris's catch). ROOT CAUSE
71
+ // (final, boris+iapeer-memory): macOS held the cask-updated codex on a GUI
72
+ // launch-approval dialog EVERY invocation parked before main (observed as a
73
+ // dyld hang) until the owner confirmed the dialog. NOT a non-tty class, not a
74
+ // broken binary. The presence probe stays right regardless: the skip question is
75
+ // "is the runtime installed", and presence answers it without executing a
76
+ // possibly-wedged binary at all.
73
77
  for (const dir of (env.PATH ?? '').split(':')) {
74
78
  if (!dir) continue
75
79
  try {
@@ -90,10 +94,11 @@ function isExecutable(binOrName: string, env: NodeJS.ProcessEnv = process.env):
90
94
  */
91
95
  export function isMarketplaceRegistered(runtime: OnboardRuntime, env: NodeJS.ProcessEnv = process.env): boolean {
92
96
  const bin = runtimeBin(runtime, env)
93
- // HARD TIMEOUT — the codex CLI hangs FOREVER in a non-tty on ANY subcommand
94
- // (live 10.06: first `--version`, then `plugin marketplace list` after the
95
- // presence-probe fix let a live codex through). Timeout status null →
96
- // "not registered" the add (also time-bounded) decides; never a wedge.
97
+ // HARD TIMEOUT — a runtime CLI can wedge before main on ANY invocation (live
98
+ // 10.06: macOS launch-approval pending after a cask update parked codex — first
99
+ // `--version`, then this very `plugin marketplace list` after the presence
100
+ // probe let the binary through). Timeout status null → "not registered" →
101
+ // the add (also time-bounded) decides; never a wedge.
97
102
  const r = spawnSync(bin, ['plugin', 'marketplace', 'list'], { encoding: 'utf8', timeout: 60_000 })
98
103
  if (r.status !== 0) return false
99
104
  return isAgfpdInList(`${r.stdout ?? ''}`)
@@ -115,12 +120,13 @@ export function isAgfpdInList(listOutput: string): boolean {
115
120
  /** Register OUR marketplace for this runtime (`<runtime> plugin marketplace add <ref>`). */
116
121
  function registerMarketplace(runtime: OnboardRuntime, env: NodeJS.ProcessEnv): { ok: boolean; detail?: string } {
117
122
  const bin = runtimeBin(runtime, env)
118
- // Same hard timeout as the list probe (codex non-tty hang class)a wedged add
119
- // degrades to a loud 'failed' line instead of freezing the host phase.
123
+ // Same hard timeout as the list probe (the pre-main wedge class — known live
124
+ // representative: macOS launch-approval pending after a cask update) a wedged
125
+ // add degrades to a loud 'failed' line instead of freezing the host phase.
120
126
  const r = spawnSync(bin, ['plugin', 'marketplace', 'add', MARKETPLACE_REF], { encoding: 'utf8', timeout: 120_000 })
121
127
  return r.status === 0
122
128
  ? { ok: true }
123
- : { ok: false, detail: (r.stderr ?? '').trim() || (r.status === null ? 'timed out (non-tty hang?)' : `exit ${r.status}`) }
129
+ : { ok: false, detail: (r.stderr ?? '').trim() || (r.status === null ? 'timed out (wedged runtime CLI?)' : `exit ${r.status}`) }
124
130
  }
125
131
 
126
132
  /**