@phnx-labs/agents-cli 1.20.19 → 1.20.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## Unreleased
4
4
 
5
+ **`agents secrets start`: persistent secrets-agent service (fixes the broker under heavy load)**
6
+
7
+ - On a heavily-loaded machine (many concurrent agents, high load average) the on-demand broker — a full CLI cold-start — couldn't get scheduled enough CPU to finish booting and bind its socket, so `unlock`/auto-cache silently failed and reads kept prompting. New `agents secrets start` installs the broker as a **launchd user service** (`RunAtLoad` + `KeepAlive`, `ProcessType: Interactive` for foreground scheduling priority): it starts once and stays up for the whole login session, so every read just connects — the cold start happens once (and launchd retries until it wins), never per read. `agents secrets stop` removes it; `agents secrets status` shows whether it's installed.
8
+ - `unlock` and the auto-cache worker now install/kickstart this service automatically via `ensureAgentRunning`, falling back to the old one-off detached spawn only if the service path is unavailable. So the persistent broker is set up on first use with no extra step.
9
+ - macOS only. Security model unchanged: in-memory only, per-bundle TTL, wiped on screen-lock/sleep.
10
+
5
11
  **Fix: secrets-agent auto-cache now survives a slow broker cold-start under load**
6
12
 
7
13
  - `secrets.agent.auto` (auto-cache on first read of a `session`-tier bundle) used a fire-and-forget inline loader that gave up connecting to the broker after 3s. But the broker it spawns is itself a full CLI cold-starting; under heavy load (many concurrent agents) that can exceed 3s, so the loader quit before the broker bound and the cache silently never populated — every read kept prompting. The auto-load now runs through a detached `secrets _agent-load` worker that reuses the robust `ensureAgentRunning` path (spawn-then-ping, 20s budget) and loads synchronously, so it reliably populates even when the broker is slow to start. Manual `agents secrets unlock` was always reliable and is unchanged. (secret values still travel over stdin, never argv.)
@@ -11,7 +11,7 @@ import { spawnSync } from 'child_process';
11
11
  import { bundleExists, bundleItemStore, bundleTier, deleteBundle, describeBundle, keychainItemsForBundle, keychainRef, listBundles, migrateLegacyBundles, parseDotenv, readAndResolveBundleEnv, readBundle, renameBundle, rotateBundleSecret, validateBundleName, validateEnvKey, validateExpiresFutureDated, validateSecretType, writeBundle, } from '../lib/secrets/bundles.js';
12
12
  import { getKeychainToken, getKeychainTokens, hasKeychainToken, secretsKeychainItem, setKeychainToken, } from '../lib/secrets/index.js';
13
13
  import { assertOpAvailable, createPasswordItem, deleteItemByTitle, extractSecrets, itemExistsByTitle, listItems, listVaults, } from '../lib/onepassword.js';
14
- import { DEFAULT_TTL_MS, agentLoad, agentLock, agentStatus, ensureAgentRunning, runAgentLoadFromStdin, runSecretsAgent, } from '../lib/secrets/agent.js';
14
+ import { DEFAULT_TTL_MS, agentLoad, agentLock, agentStatus, ensureAgentRunning, installSecretsAgentService, runAgentLoadFromStdin, runSecretsAgent, secretsAgentServiceInstalled, uninstallSecretsAgentService, } from '../lib/secrets/agent.js';
15
15
  import { parseDuration } from '../lib/hooks/cache.js';
16
16
  import { registerCommandGroups, setHelpSections } from '../lib/help.js';
17
17
  import { isInteractiveTerminal, isPromptCancelled } from './utils.js';
@@ -416,7 +416,7 @@ export function registerSecretsCommands(program) {
416
416
  registerCommandGroups(cmd, [
417
417
  { title: 'Bundle commands', names: ['list', 'view', 'create', 'rename', 'describe', 'delete'] },
418
418
  { title: 'Secret commands', names: ['add', 'rotate', 'remove', 'import', 'export'] },
419
- { title: 'Agent commands', names: ['unlock', 'lock', 'status', 'tier'] },
419
+ { title: 'Agent commands', names: ['start', 'stop', 'unlock', 'lock', 'status', 'tier'] },
420
420
  { title: 'Raw item commands', names: ['get', 'set'] },
421
421
  { title: 'Sync commands', names: ['push', 'pull', 'remote-list'] },
422
422
  { title: 'Utilities', names: ['exec', 'generate', 'migrate-acl'] },
@@ -1363,6 +1363,10 @@ Examples:
1363
1363
  console.log(chalk.gray('secrets-agent is macOS-only.'));
1364
1364
  return;
1365
1365
  }
1366
+ console.log(chalk.gray('service: ') +
1367
+ (secretsAgentServiceInstalled()
1368
+ ? chalk.green('installed (persistent)')
1369
+ : chalk.yellow('not installed — run `agents secrets start` for a persistent broker')));
1366
1370
  const entries = await agentStatus();
1367
1371
  if (entries.length === 0) {
1368
1372
  console.log(chalk.gray('No bundles unlocked. The secrets-agent is idle or not running.'));
@@ -1397,11 +1401,38 @@ Examples:
1397
1401
  process.exit(1);
1398
1402
  }
1399
1403
  });
1404
+ cmd
1405
+ .command('start')
1406
+ .description('Install + start the secrets-agent as a persistent background service (macOS). Survives heavy load; reads connect instantly.')
1407
+ .action(async () => {
1408
+ if (process.platform !== 'darwin') {
1409
+ console.error(chalk.red('secrets-agent service is macOS-only.'));
1410
+ process.exit(1);
1411
+ }
1412
+ process.stdout.write(chalk.gray('Installing launchd service…\n'));
1413
+ if (await installSecretsAgentService()) {
1414
+ console.log(chalk.green('secrets-agent service running.') + chalk.gray(' It stays up across the session; unlock/auto-cache now connect instantly.'));
1415
+ }
1416
+ else {
1417
+ console.error(chalk.red('Service installed but did not become reachable in time (machine may be heavily loaded — launchd will keep retrying).'));
1418
+ process.exit(1);
1419
+ }
1420
+ });
1421
+ cmd
1422
+ .command('stop')
1423
+ .description('Stop + remove the persistent secrets-agent service and wipe what it held.')
1424
+ .action(async () => {
1425
+ if (process.platform !== 'darwin')
1426
+ return;
1427
+ await uninstallSecretsAgentService();
1428
+ console.log(chalk.green('secrets-agent service stopped and removed.'));
1429
+ });
1400
1430
  cmd
1401
1431
  .command('_agent-run', { hidden: true })
1402
1432
  .description('Run the secrets-agent broker in the foreground (internal)')
1403
- .action(async () => {
1404
- await runSecretsAgent();
1433
+ .option('--service', 'run as a persistent launchd service (never idle-exit)')
1434
+ .action(async (opts) => {
1435
+ await runSecretsAgent({ service: Boolean(opts.service) });
1405
1436
  });
1406
1437
  cmd
1407
1438
  .command('_agent-load', { hidden: true })
@@ -105,7 +105,13 @@ async function versionPruneAction(specs, options, commandName) {
105
105
  }
106
106
  const { agent, version } = parsed;
107
107
  const agentConfig = AGENTS[agent];
108
- if (version === 'latest' || version === 'oldest' || !spec.includes('@')) {
108
+ // Script-installed agents (droid, grok) can have a *literal* `latest`
109
+ // version dir on disk when the post-install version probe failed. An
110
+ // explicit `<agent>@latest` should remove that dir directly rather than
111
+ // routing to the interactive picker (which can't run non-interactively),
112
+ // so treat an installed literal `latest` as a concrete pinned version.
113
+ const isLiteralLatestInstalled = version === 'latest' && spec.includes('@') && isVersionInstalled(agent, 'latest');
114
+ if (!isLiteralLatestInstalled && (version === 'latest' || version === 'oldest' || !spec.includes('@'))) {
109
115
  const versions = listInstalledVersions(agent);
110
116
  if (versions.length === 0) {
111
117
  console.log(chalk.gray(`No versions of ${agentLabel(agentConfig.id)} installed`));
@@ -265,6 +265,9 @@ export async function runDaemon() {
265
265
  scheduler.reloadAll();
266
266
  const reloaded = scheduler.listScheduled();
267
267
  log('INFO', `Reloaded ${reloaded.length} jobs`);
268
+ // Drop the memoized R2 config so rotated/added sync credentials are re-read
269
+ // on the next cycle instead of waiting for a restart.
270
+ void import('./session/sync/config.js').then(m => m.clearR2ConfigCache());
268
271
  };
269
272
  const handleShutdown = async () => {
270
273
  log('INFO', 'Daemon shutting down');
@@ -37,6 +37,17 @@ export interface AgentStatusEntry {
37
37
  expiresAt: number;
38
38
  keyCount: number;
39
39
  }
40
+ /** True if the launchd plist for the persistent broker is installed. */
41
+ export declare function secretsAgentServiceInstalled(): boolean;
42
+ /**
43
+ * Install + start the persistent broker as a launchd user service (idempotent).
44
+ * Writes the plist, bootstraps it into the GUI domain, and waits for the socket.
45
+ * `ProcessType: Interactive` asks launchd to schedule it at foreground priority
46
+ * so it can boot even when the machine is loaded. Returns true once reachable.
47
+ */
48
+ export declare function installSecretsAgentService(timeoutMs?: number): Promise<boolean>;
49
+ /** Stop + remove the persistent broker service, and wipe whatever it held. */
50
+ export declare function uninstallSecretsAgentService(): Promise<void>;
40
51
  export type Request = {
41
52
  cmd: 'ping';
42
53
  } | {
@@ -95,7 +106,9 @@ export declare function handleAgentRequest(store: Map<string, StoredBundle>, req
95
106
  * `agents secrets _agent-run`. Holds the store in memory, serves the socket,
96
107
  * sweeps expired entries, wipes on screen-lock/sleep, and self-exits when idle.
97
108
  */
98
- export declare function runSecretsAgent(): Promise<void>;
109
+ export declare function runSecretsAgent(opts?: {
110
+ service?: boolean;
111
+ }): Promise<void>;
99
112
  /** True if a broker socket exists at all. Cheap; gates the sync read so the
100
113
  * never-unlocked path stays a single stat. */
101
114
  export declare function agentSocketExists(): boolean;
@@ -140,8 +153,13 @@ export declare function agentLock(name?: string): Promise<number>;
140
153
  /** List currently-unlocked bundles, or [] when no broker is running. */
141
154
  export declare function agentStatus(): Promise<AgentStatusEntry[]>;
142
155
  /**
143
- * Ensure a broker is running and reachable, spawning one detached if not.
144
- * Returns true once the socket answers a ping. On protocol-version skew, kills
145
- * the stale broker and respawns. macOS only.
156
+ * Ensure a broker is running and reachable. Returns true once the socket answers
157
+ * a ping. macOS only.
158
+ *
159
+ * Prefers the persistent launchd service: if it isn't installed we install it
160
+ * (which makes the broker survive for the whole login session, so subsequent
161
+ * reads never cold-start); if it's installed but unreachable we kickstart it.
162
+ * Only when the service path can't be used do we fall back to a one-off detached
163
+ * broker — that's the model that gets starved under heavy load, so it's last.
146
164
  */
147
165
  export declare function ensureAgentRunning(timeoutMs?: number): Promise<boolean>;
@@ -24,6 +24,7 @@
24
24
  */
25
25
  import * as net from 'net';
26
26
  import * as fs from 'fs';
27
+ import * as os from 'os';
27
28
  import * as path from 'path';
28
29
  import { spawn, spawnSync, execFileSync } from 'child_process';
29
30
  import { getHelpersDir, readMeta } from '../state.js';
@@ -83,6 +84,113 @@ function cliSpawn(sub) {
83
84
  function brokerSpawn() {
84
85
  return cliSpawn(['secrets', '_agent-run']);
85
86
  }
87
+ // ─── Persistent launchd service ──────────────────────────────────────────────
88
+ // On a heavily-loaded machine a freshly-spawned broker (a full CLI cold start)
89
+ // can't get scheduled enough CPU to finish booting and bind its socket — so the
90
+ // on-demand model fails exactly when there are many agents (the case we care
91
+ // about). The fix is to run the broker as a launchd user service: started once
92
+ // with RunAtLoad + KeepAlive, it stays up, and every read just connects. The
93
+ // cold start happens once (and launchd retries until it wins), never per-read.
94
+ const SERVICE_LABEL = 'com.phnx-labs.agents-secrets-agent';
95
+ function servicePlistPath() {
96
+ return path.join(os.homedir(), 'Library', 'LaunchAgents', `${SERVICE_LABEL}.plist`);
97
+ }
98
+ /** True if the launchd plist for the persistent broker is installed. */
99
+ export function secretsAgentServiceInstalled() {
100
+ return onDarwin() && fs.existsSync(servicePlistPath());
101
+ }
102
+ function generateServicePlist() {
103
+ const { cmd, args } = cliSpawn(['secrets', '_agent-run', '--service']);
104
+ const progArgs = [cmd, ...args]
105
+ .map((a) => ` <string>${a.replace(/&/g, '&amp;').replace(/</g, '&lt;')}</string>`)
106
+ .join('\n');
107
+ const logPath = path.join(agentDir(), 'service.log');
108
+ const home = os.homedir();
109
+ return `<?xml version="1.0" encoding="UTF-8"?>
110
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
111
+ <plist version="1.0">
112
+ <dict>
113
+ <key>Label</key>
114
+ <string>${SERVICE_LABEL}</string>
115
+ <key>ProgramArguments</key>
116
+ <array>
117
+ ${progArgs}
118
+ </array>
119
+ <key>RunAtLoad</key>
120
+ <true/>
121
+ <key>KeepAlive</key>
122
+ <true/>
123
+ <key>ProcessType</key>
124
+ <string>Interactive</string>
125
+ <key>StandardOutPath</key>
126
+ <string>${logPath}</string>
127
+ <key>StandardErrorPath</key>
128
+ <string>${logPath}</string>
129
+ <key>EnvironmentVariables</key>
130
+ <dict>
131
+ <key>PATH</key>
132
+ <string>/usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin:${home}/.bun/bin</string>
133
+ </dict>
134
+ </dict>
135
+ </plist>`;
136
+ }
137
+ /**
138
+ * Install + start the persistent broker as a launchd user service (idempotent).
139
+ * Writes the plist, bootstraps it into the GUI domain, and waits for the socket.
140
+ * `ProcessType: Interactive` asks launchd to schedule it at foreground priority
141
+ * so it can boot even when the machine is loaded. Returns true once reachable.
142
+ */
143
+ export async function installSecretsAgentService(timeoutMs = 30000) {
144
+ if (!onDarwin())
145
+ return false;
146
+ const plist = servicePlistPath();
147
+ fs.mkdirSync(path.dirname(plist), { recursive: true });
148
+ fs.writeFileSync(plist, generateServicePlist());
149
+ const uid = process.getuid?.() ?? 0;
150
+ // bootstrap is the modern API; fall back to legacy load. Both idempotent-ish.
151
+ try {
152
+ execFileSync('launchctl', ['bootstrap', `gui/${uid}`, plist], { stdio: ['ignore', 'ignore', 'ignore'] });
153
+ }
154
+ catch {
155
+ try {
156
+ execFileSync('launchctl', ['load', '-w', plist], { stdio: ['ignore', 'ignore', 'ignore'] });
157
+ }
158
+ catch { /* may already be loaded */ }
159
+ }
160
+ // kickstart to force an immediate start even if already bootstrapped.
161
+ try {
162
+ execFileSync('launchctl', ['kickstart', '-k', `gui/${uid}/${SERVICE_LABEL}`], { stdio: ['ignore', 'ignore', 'ignore'] });
163
+ }
164
+ catch { /* best effort */ }
165
+ const deadline = Date.now() + timeoutMs;
166
+ while (Date.now() < deadline) {
167
+ if (await agentPing())
168
+ return true;
169
+ await new Promise((r) => setTimeout(r, 200));
170
+ }
171
+ return false;
172
+ }
173
+ /** Stop + remove the persistent broker service, and wipe whatever it held. */
174
+ export async function uninstallSecretsAgentService() {
175
+ if (!onDarwin())
176
+ return;
177
+ await agentLock(); // wipe the in-memory store before tearing down
178
+ const plist = servicePlistPath();
179
+ const uid = process.getuid?.() ?? 0;
180
+ try {
181
+ execFileSync('launchctl', ['bootout', `gui/${uid}/${SERVICE_LABEL}`], { stdio: ['ignore', 'ignore', 'ignore'] });
182
+ }
183
+ catch {
184
+ try {
185
+ execFileSync('launchctl', ['unload', '-w', plist], { stdio: ['ignore', 'ignore', 'ignore'] });
186
+ }
187
+ catch { /* not loaded */ }
188
+ }
189
+ try {
190
+ fs.unlinkSync(plist);
191
+ }
192
+ catch { /* already gone */ }
193
+ }
86
194
  // ─── Broker server (runs in the detached `secrets _agent-run` process) ───────
87
195
  /**
88
196
  * Pure request handler over the in-memory store. Extracted so the store
@@ -130,9 +238,13 @@ export function handleAgentRequest(store, req, now = Date.now()) {
130
238
  * `agents secrets _agent-run`. Holds the store in memory, serves the socket,
131
239
  * sweeps expired entries, wipes on screen-lock/sleep, and self-exits when idle.
132
240
  */
133
- export async function runSecretsAgent() {
241
+ export async function runSecretsAgent(opts = {}) {
134
242
  if (!onDarwin())
135
243
  return; // nothing to broker without biometry prompts
244
+ // When launchd keeps us alive as a persistent service, never idle-exit:
245
+ // exiting would just make launchd cold-start us again, reintroducing the
246
+ // startup-under-load fragility the service exists to avoid.
247
+ const persistent = opts.service === true;
136
248
  // Single-instance guard: O_EXCL pid file. If a live broker already holds it,
137
249
  // exit quietly — the existing one keeps serving.
138
250
  const pidFile = pidPath();
@@ -172,7 +284,7 @@ export async function runSecretsAgent() {
172
284
  if (now >= e.expiresAt)
173
285
  store.delete(name);
174
286
  if (store.size === 0) {
175
- if (now - emptySince >= IDLE_EXIT_MS)
287
+ if (!persistent && now - emptySince >= IDLE_EXIT_MS)
176
288
  shutdown(0);
177
289
  }
178
290
  else {
@@ -452,16 +564,43 @@ async function agentPing() {
452
564
  return r?.ok === true && r.cmd === 'ping' && r.version === PROTOCOL_VERSION;
453
565
  }
454
566
  /**
455
- * Ensure a broker is running and reachable, spawning one detached if not.
456
- * Returns true once the socket answers a ping. On protocol-version skew, kills
457
- * the stale broker and respawns. macOS only.
567
+ * Ensure a broker is running and reachable. Returns true once the socket answers
568
+ * a ping. macOS only.
569
+ *
570
+ * Prefers the persistent launchd service: if it isn't installed we install it
571
+ * (which makes the broker survive for the whole login session, so subsequent
572
+ * reads never cold-start); if it's installed but unreachable we kickstart it.
573
+ * Only when the service path can't be used do we fall back to a one-off detached
574
+ * broker — that's the model that gets starved under heavy load, so it's last.
458
575
  */
459
576
  export async function ensureAgentRunning(timeoutMs = 5000) {
460
577
  if (!onDarwin())
461
578
  return false;
462
579
  if (await agentPing())
463
580
  return true;
464
- // Socket exists but ping failed → stale/old broker. Kill it before respawn.
581
+ // Path 1: the persistent service. installSecretsAgentService is idempotent and
582
+ // waits for the socket; for an already-installed service we kickstart and wait.
583
+ try {
584
+ if (!secretsAgentServiceInstalled()) {
585
+ if (await installSecretsAgentService(Math.max(timeoutMs, 20000)))
586
+ return true;
587
+ }
588
+ else {
589
+ const uid = process.getuid?.() ?? 0;
590
+ try {
591
+ execFileSync('launchctl', ['kickstart', '-k', `gui/${uid}/${SERVICE_LABEL}`], { stdio: ['ignore', 'ignore', 'ignore'] });
592
+ }
593
+ catch { /* may already be running */ }
594
+ const d = Date.now() + timeoutMs;
595
+ while (Date.now() < d) {
596
+ if (await agentPing())
597
+ return true;
598
+ await new Promise((r) => setTimeout(r, 150));
599
+ }
600
+ }
601
+ }
602
+ catch { /* fall through to the one-off spawn */ }
603
+ // Path 2 (fallback): one-off detached broker. Clear a stale socket/pid first.
465
604
  const stalePid = (() => {
466
605
  try {
467
606
  return parseInt(fs.readFileSync(pidPath(), 'utf-8').trim(), 10);
@@ -485,11 +624,7 @@ export async function ensureAgentRunning(timeoutMs = 5000) {
485
624
  }
486
625
  catch { /* gone */ }
487
626
  const { cmd, args } = brokerSpawn();
488
- const child = spawn(cmd, args, {
489
- stdio: 'ignore',
490
- detached: true,
491
- });
492
- child.unref();
627
+ spawn(cmd, args, { stdio: 'ignore', detached: true }).unref();
493
628
  const deadline = Date.now() + timeoutMs;
494
629
  while (Date.now() < deadline) {
495
630
  if (await agentPing())
@@ -13,14 +13,26 @@ export interface R2Config {
13
13
  /** S3-compatible endpoint for the account (no bucket, no trailing slash). */
14
14
  endpoint: string;
15
15
  }
16
+ /** Window after a prompt-bearing resolution failure during which we skip
17
+ * re-attempting (and thus re-prompting). SIGHUP / restart bypasses it. */
18
+ export declare const RESOLVE_RETRY_COOLDOWN_MS: number;
19
+ /** Drop the cached resolution so the next call reads the bundle fresh. Called on
20
+ * daemon SIGHUP (to pick up rotated credentials) and between tests. */
21
+ export declare function clearR2ConfigCache(): void;
16
22
  /**
17
- * Resolve R2 credentials from the `r2.backups` bundle. Throws a clear,
18
- * actionable error if the bundle or any key is missing sync cannot proceed
19
- * without real credentials (no silent fallback).
23
+ * Resolve R2 credentials, reading the keychain at most once per process. The
24
+ * first call reads (and may prompt for Touch ID); every later call returns the
25
+ * memoized result. Throws if the bundle/keys are missing — failures are not
26
+ * memoized, but see isSyncConfigured for the re-prompt cooldown.
20
27
  */
21
28
  export declare function loadR2Config(): R2Config;
22
- /** True when the sync bundle exists and looks resolvable, without throwing. */
23
- export declare function isSyncConfigured(): boolean;
29
+ /**
30
+ * True when the sync bundle exists and resolves, without throwing. After a
31
+ * prompt-bearing failure (e.g. a cancelled Touch ID) it returns false without
32
+ * re-reading the keychain for RESOLVE_RETRY_COOLDOWN_MS, so a dismissed prompt
33
+ * does not re-storm every cycle. `now` is injectable for tests.
34
+ */
35
+ export declare function isSyncConfigured(now?: number): boolean;
24
36
  /**
25
37
  * This machine's stable, human-readable id, used as its R2 prefix and mirror
26
38
  * directory name. Tailnet hostnames (zion, yosemite-s0, mac-mini) are already
@@ -12,7 +12,7 @@ export const SYNC_BUNDLE = 'r2.backups';
12
12
  * actionable error if the bundle or any key is missing — sync cannot proceed
13
13
  * without real credentials (no silent fallback).
14
14
  */
15
- export function loadR2Config() {
15
+ function resolveR2Config() {
16
16
  const { env } = readAndResolveBundleEnv(SYNC_BUNDLE, { caller: 'sessions-sync' });
17
17
  const accountId = env.R2_ACCOUNT_ID?.trim();
18
18
  const bucket = env.R2_BUCKET_NAME?.trim();
@@ -36,13 +36,60 @@ export function loadR2Config() {
36
36
  endpoint: `https://${accountId}.r2.cloudflarestorage.com`,
37
37
  };
38
38
  }
39
- /** True when the sync bundle exists and looks resolvable, without throwing. */
40
- export function isSyncConfigured() {
39
+ // ── Resolution cache ────────────────────────────────────────────────────────
40
+ // The daemon calls isSyncConfigured() + syncSessions() every ~90s, and each used
41
+ // to trigger a fresh read of the biometry-gated `r2.backups` keychain items —
42
+ // one Touch ID prompt per gated item, every cycle, forever. We instead resolve
43
+ // at most once per process: a success is memoized for the process lifetime
44
+ // (cleared on daemon SIGHUP via clearR2ConfigCache), so subsequent cycles never
45
+ // touch the keychain again. A *prompt-bearing* failure (cancelled Touch ID, etc.)
46
+ // starts a cooldown so a dismissed prompt is not re-issued every cycle. A simply
47
+ // absent bundle never prompts, so it is re-checked each cycle (fast pickup when
48
+ // the user later adds credentials).
49
+ let cachedConfig = null;
50
+ let lastPromptFailureAt = 0;
51
+ /** Window after a prompt-bearing resolution failure during which we skip
52
+ * re-attempting (and thus re-prompting). SIGHUP / restart bypasses it. */
53
+ export const RESOLVE_RETRY_COOLDOWN_MS = 30 * 60 * 1000; // 30 minutes
54
+ /** Drop the cached resolution so the next call reads the bundle fresh. Called on
55
+ * daemon SIGHUP (to pick up rotated credentials) and between tests. */
56
+ export function clearR2ConfigCache() {
57
+ cachedConfig = null;
58
+ lastPromptFailureAt = 0;
59
+ }
60
+ /**
61
+ * Resolve R2 credentials, reading the keychain at most once per process. The
62
+ * first call reads (and may prompt for Touch ID); every later call returns the
63
+ * memoized result. Throws if the bundle/keys are missing — failures are not
64
+ * memoized, but see isSyncConfigured for the re-prompt cooldown.
65
+ */
66
+ export function loadR2Config() {
67
+ if (cachedConfig)
68
+ return cachedConfig;
69
+ cachedConfig = resolveR2Config();
70
+ return cachedConfig;
71
+ }
72
+ /**
73
+ * True when the sync bundle exists and resolves, without throwing. After a
74
+ * prompt-bearing failure (e.g. a cancelled Touch ID) it returns false without
75
+ * re-reading the keychain for RESOLVE_RETRY_COOLDOWN_MS, so a dismissed prompt
76
+ * does not re-storm every cycle. `now` is injectable for tests.
77
+ */
78
+ export function isSyncConfigured(now = Date.now()) {
79
+ if (cachedConfig)
80
+ return true;
81
+ if (lastPromptFailureAt && now - lastPromptFailureAt < RESOLVE_RETRY_COOLDOWN_MS)
82
+ return false;
41
83
  try {
42
84
  loadR2Config();
43
85
  return true;
44
86
  }
45
- catch {
87
+ catch (err) {
88
+ // A missing bundle never prompts, so keep re-checking it each cycle (so a
89
+ // later `agents secrets add` is picked up quickly). Any other failure may
90
+ // have cost a prompt (cancelled Touch ID, keychain error) — back off.
91
+ if (!/not found/i.test(err.message))
92
+ lastPromptFailureAt = now;
46
93
  return false;
47
94
  }
48
95
  }
@@ -176,6 +176,26 @@ export declare function installVersion(agent: AgentId, version: string, onProgre
176
176
  installedVersion: string;
177
177
  error?: string;
178
178
  }>;
179
+ /**
180
+ * Fold a stale literal `latest` version dir into the real resolved version.
181
+ *
182
+ * Script-installed agents (droid, grok) have no npm package to read a version
183
+ * from, so the installer resolves the version by probing `<cli> --version`
184
+ * after the install script runs. When that probe failed (3s timeout, or the
185
+ * freshly-dropped binary not yet resolvable on PATH) the installer fell back to
186
+ * the literal string `latest`, creating a `versions/<agent>/latest/` dir. A
187
+ * later install where the probe succeeded then created a SECOND dir at the real
188
+ * semver, orphaning `latest` — and because these agents' getBinaryPath points
189
+ * at a single global binary regardless of version dir, `latest` keeps showing
190
+ * up in `agents view` next to the real version forever.
191
+ *
192
+ * Call this once the install path has resolved a real version: if a stale
193
+ * `latest` dir exists, rename it onto the real version (preserving `home/`), or
194
+ * if the real dir already exists, soft-delete the `latest` dir to trash. No-op
195
+ * when nothing was resolved or no stale dir is present, so it is safe to call
196
+ * on every script-based install. Returns the action taken (for tests/logging).
197
+ */
198
+ export declare function reconcileStaleLatestDir(agent: AgentId, installedVersion: string): Promise<'none' | 'renamed' | 'trashed'>;
179
199
  /**
180
200
  * Soft-delete a version directory by moving it to ~/.agents/.system/trash/versions/.
181
201
  * Returns the trash path on success or null on failure / no source.
@@ -996,6 +996,9 @@ export async function installVersion(agent, version, onProgress) {
996
996
  await execAsync(script, { timeout: 120000 });
997
997
  if (version === 'latest') {
998
998
  installedVersion = await getCliVersionFromPath(agent) || version;
999
+ // Fold any stale literal `latest` dir from an earlier probe-failed
1000
+ // install into the real version so it stops shadowing `agents view`.
1001
+ await reconcileStaleLatestDir(agent, installedVersion);
999
1002
  }
1000
1003
  onProgress?.(`${agentConfig.name} installed. Setting up agents-cli version home for isolation...`);
1001
1004
  }
@@ -1158,6 +1161,51 @@ function removeInstallArtifacts(versionDir) {
1158
1161
  fs.rmSync(path.join(versionDir, entry), { recursive: true, force: true });
1159
1162
  }
1160
1163
  }
1164
+ /**
1165
+ * Fold a stale literal `latest` version dir into the real resolved version.
1166
+ *
1167
+ * Script-installed agents (droid, grok) have no npm package to read a version
1168
+ * from, so the installer resolves the version by probing `<cli> --version`
1169
+ * after the install script runs. When that probe failed (3s timeout, or the
1170
+ * freshly-dropped binary not yet resolvable on PATH) the installer fell back to
1171
+ * the literal string `latest`, creating a `versions/<agent>/latest/` dir. A
1172
+ * later install where the probe succeeded then created a SECOND dir at the real
1173
+ * semver, orphaning `latest` — and because these agents' getBinaryPath points
1174
+ * at a single global binary regardless of version dir, `latest` keeps showing
1175
+ * up in `agents view` next to the real version forever.
1176
+ *
1177
+ * Call this once the install path has resolved a real version: if a stale
1178
+ * `latest` dir exists, rename it onto the real version (preserving `home/`), or
1179
+ * if the real dir already exists, soft-delete the `latest` dir to trash. No-op
1180
+ * when nothing was resolved or no stale dir is present, so it is safe to call
1181
+ * on every script-based install. Returns the action taken (for tests/logging).
1182
+ */
1183
+ export async function reconcileStaleLatestDir(agent, installedVersion) {
1184
+ if (installedVersion === 'latest')
1185
+ return 'none';
1186
+ const staleLatestDir = getVersionDir(agent, 'latest');
1187
+ const realVersionDir = getVersionDir(agent, installedVersion);
1188
+ if (staleLatestDir === realVersionDir || !fs.existsSync(staleLatestDir)) {
1189
+ return 'none';
1190
+ }
1191
+ if (!fs.existsSync(realVersionDir)) {
1192
+ fs.renameSync(staleLatestDir, realVersionDir);
1193
+ return 'renamed';
1194
+ }
1195
+ // Both dirs exist. Stripping install artifacts would not hide `latest` for
1196
+ // global-binary agents (getBinaryPath ignores dir contents), so the whole
1197
+ // dir must go. Soft-delete to trash so any `home/` data stays recoverable
1198
+ // via `agents restore <agent>@latest`, then rewrite session file paths to
1199
+ // point at the trashed location so history stays readable. The session-db
1200
+ // module is imported lazily — it carries a top-level await that the CJS test
1201
+ // harness can't statically transform, so it must stay out of the eager graph.
1202
+ const trashPath = softDeleteVersionDir(agent, 'latest');
1203
+ if (trashPath) {
1204
+ const { updateSessionFilePaths } = await import('./session/db.js');
1205
+ updateSessionFilePaths(staleLatestDir, trashPath);
1206
+ }
1207
+ return 'trashed';
1208
+ }
1161
1209
  /**
1162
1210
  * Soft-delete a version directory by moving it to ~/.agents/.system/trash/versions/.
1163
1211
  * Returns the trash path on success or null on failure / no source.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phnx-labs/agents-cli",
3
- "version": "1.20.19",
3
+ "version": "1.20.21",
4
4
  "description": "One CLI for all your AI coding agents - versions, config, cloud dispatch, sessions, and teams (now with first-class Grok Build CLI support)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",