@ouro.bot/cli 0.1.0-alpha.519 → 0.1.0-alpha.520

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/changelog.json CHANGED
@@ -1,6 +1,33 @@
1
1
  {
2
2
  "_note": "This changelog is maintained as part of the PR/version-bump workflow. Agent-curated, not auto-generated. Agents read this file directly via read_file to understand what changed between versions.",
3
3
  "versions": [
4
+ {
5
+ "version": "0.1.0-alpha.520",
6
+ "changes": [
7
+ "`ouro up` now prints the current runtime version in the update-check phase before asking npm for newer bits, so a stalled registry or update path no longer hides what is actually running.",
8
+ "Production `ouro up` starts or refreshes the daemon before provider health checks and repair prompts. Broken credentials for one agent are reported as degraded provider state after the daemon is answering instead of preventing every agent from coming online.",
9
+ "Daemon startup now opens the command socket before autostarting provider-dependent workers and kicks agent/sense autostart concurrently. A thrown or stalled config check is contained to that agent's crashed snapshot, leaving the daemon and sibling agents reachable.",
10
+ "BlueBubbles no longer auto-learns a one-to-one chat partner as the agent's own iMessage handle after an outbound reply, so the next inbound text from that person is not filtered as self-talk.",
11
+ "Mail tools and the Ouro Mailbox now retry runtime/config resolution when a sense process cached a transient vault-unavailable state, and prompt sense-status rendering reads the same cached vault runtime truth instead of stale local config.",
12
+ "`ouro up` now times out unresolved daemon startup polling instead of spinning forever, and daemon worker startup ignores duplicate start attempts while a provider/config check is already in flight. Stopping during a pending config check no longer lets that worker spawn afterward.",
13
+ "BlueBubbles self-handle filtering is now group-echo scoped and bypasses known non-self friends, so a stale `ownHandles` entry cannot make real Ari messages disappear from Slugger's iMessage turn loop.",
14
+ "`ouro up` now trusts daemon-published provider readiness after startup instead of doing a second foreground vault read, so a slow Bitwarden provider check cannot freeze the command or report a false auth prompt after the daemon is already healthy.",
15
+ "Bitwarden vault login/unlock now passes the saved vault unlock secret through `bw --passwordenv` instead of process arguments, so the secret does not appear in `ps` output while `ouro up` or a sense startup is unlocking the agent vault.",
16
+ "Production Bitwarden reads for structured runtime/provider item names now use bounded exact search with isolated app data instead of a full-vault listing, reducing the startup window where `ouro up` appears quiet while the vault CLI is busy.",
17
+ "Daemon startup provider health checks now cache the selected provider credentials they just verified, so the first MCP or iMessage turn does not re-open Bitwarden before using the already-checked `openai-codex` key.",
18
+ "Managed sense workers now receive the daemon's already-verified provider credential snapshot over IPC at startup, so BlueBubbles and other child processes do not reopen Bitwarden mid-message after `ouro up` has already proved the selected provider is ready.",
19
+ "Daemon health recovery no longer cancels an agent whose config check is still in flight, so a slow provider/vault check cannot leave the inner-dialog worker stopped after `ouro up`.",
20
+ "`ouro up` no longer waits on optional sense runtime/config vault refreshes. Sense workers use cached config for the boot decision, refresh their runtime config in the background, and retry once fresh config arrives, while entrypoints start their worker before best-effort runtime refresh completes.",
21
+ "Daemon health recovery now invalidates stale no-process startup attempts before retrying, so an old hung provider check cannot keep an agent stuck in `starting` forever or spawn a stale worker after recovery; stale recovery defaults to 45s so the next health pass can clear it.",
22
+ "Provider initialization failures during daemon-handled sense turns now throw back to the command boundary instead of calling `process.exit(1)`, so a bad provider/vault read can fail one MCP or iMessage turn without killing the `ouro up` supervisor.",
23
+ "Provider retry and single-provider read paths now refresh only the selected provider's vault item, so a slow or broken credential for another provider cannot stall a healthy `openai-codex` turn.",
24
+ "`ouro status --agent <name>` now refreshes only the providers selected by that agent's outward/inner lanes, and skips vault reads entirely when local provider state is missing, so an unused broken provider cannot freeze status.",
25
+ "`ouro up` now keeps its startup poll finite while allowing enough time for bounded Bitwarden retry paths before labeling an otherwise-progressing worker as timed out.",
26
+ "User-facing session transcript summaries and searches now hide tool-result chatter for outward sessions while keeping full tool traces available in `self/inner`, so iMessage history reads as the human conversation instead of shell logs.",
27
+ "Small transcript-tail reads now always keep the latest visible user and assistant turns even after tool-heavy BlueBubbles activity, so Slugger does not mistake a live iMessage session for stale history.",
28
+ "Custom-socket daemon runs, including hermetic integration sandboxes, no longer touch the production daemon pidfile or orphan-cleanup sweep, so tests and dev harnesses cannot SIGTERM the real `ouro up` daemon while validating startup."
29
+ ]
30
+ },
4
31
  {
5
32
  "version": "0.1.0-alpha.519",
6
33
  "changes": [
@@ -54,7 +54,7 @@ const runtime_1 = require("../nerves/runtime");
54
54
  // Dynamic import: agent-entry is boot-time wiring that starts a sense process.
55
55
  // Using dynamic import avoids a static heart/ -> senses/ dependency.
56
56
  Promise.resolve().then(() => __importStar(require("./runtime-credentials"))).then(async ({ refreshRuntimeCredentialConfig }) => {
57
- await refreshRuntimeCredentialConfig(agentName, { preserveCachedOnFailure: true }).catch(() => undefined);
57
+ void refreshRuntimeCredentialConfig(agentName, { preserveCachedOnFailure: true }).catch(() => undefined);
58
58
  const { startInnerDialogWorker } = await Promise.resolve().then(() => __importStar(require("../senses/inner-dialog-worker")));
59
59
  await startInnerDialogWorker();
60
60
  })
@@ -388,6 +388,7 @@ async function runRuntimeAuthFlow(input, deps = {}) {
388
388
  });
389
389
  writeAuthProgress(input, `checking ${input.agentName}'s vault access...`);
390
390
  const vault = await (0, provider_credentials_1.refreshProviderCredentialPool)(input.agentName, {
391
+ providers: [input.provider],
391
392
  onProgress: (message) => writeAuthProgress(input, message),
392
393
  });
393
394
  if (!vault.ok && vault.reason === "unavailable") {
@@ -118,21 +118,24 @@ async function getProviderRuntime(facing = "human") {
118
118
  event: "engine.provider_init_error",
119
119
  component: "engine",
120
120
  message: msg,
121
- meta: {},
121
+ meta: { facing },
122
122
  });
123
123
  // eslint-disable-next-line no-console -- pre-boot guard: provider init failure
124
124
  console.error(`\n[fatal] ${msg}\n`);
125
- process.exit(1);
125
+ throw error instanceof Error ? error : new Error(msg);
126
126
  }
127
127
  if (!_providerRuntimes[facing]) {
128
+ const msg = "provider runtime could not be initialized.";
128
129
  (0, runtime_1.emitNervesEvent)({
129
130
  level: "error",
130
131
  event: "engine.provider_init_error",
131
132
  component: "engine",
132
- message: "provider runtime could not be initialized.",
133
- meta: {},
133
+ message: msg,
134
+ meta: { facing },
134
135
  });
135
- process.exit(1);
136
+ // eslint-disable-next-line no-console -- pre-boot guard: provider init failure
137
+ console.error(`\n[fatal] ${msg}\n`);
138
+ throw new Error(msg);
136
139
  }
137
140
  return _providerRuntimes[facing].runtime;
138
141
  }
@@ -759,7 +762,10 @@ async function runAgent(messages, callbacks, channel, signal, options) {
759
762
  const seconds = delayMs / 1000;
760
763
  const cause = RETRY_LABELS[record.classification];
761
764
  try {
762
- await (0, provider_credentials_1.refreshProviderCredentialPool)((0, identity_2.getAgentName)(), { preserveCachedOnFailure: true });
765
+ await (0, provider_credentials_1.refreshProviderCredentialPool)((0, identity_2.getAgentName)(), {
766
+ preserveCachedOnFailure: true,
767
+ providers: [record.provider],
768
+ });
763
769
  _providerRuntimes[facing] = null;
764
770
  providerRuntime = await getProviderRuntime(facing);
765
771
  providerRuntime.resetTurnState(messages);
@@ -415,7 +415,7 @@ async function checkAgentConfigWithProviderHealth(agentName, bundlesRoot, deps =
415
415
  const poolResult = await (0, provider_credentials_1.refreshProviderCredentialPool)(agentName, {
416
416
  ...(deps.onProgress ? { onProgress: mapVaultRefreshProgress(agentName, deps.onProgress) } : {}),
417
417
  providers,
418
- skipCache: true,
418
+ preserveCachedOnFailure: true,
419
419
  });
420
420
  const pingGroups = new Map();
421
421
  const lanes = ["outward", "inner"];
@@ -200,8 +200,7 @@ async function runCliUpdateCheckWithTimeout(checkForCliUpdate, timeoutMs = updat
200
200
  /* v8 ignore stop */
201
201
  });
202
202
  }
203
- async function checkAgentProviders(deps, agentsOverride, onProgress) {
204
- const agents = agentsOverride ?? await listCliAgents(deps);
203
+ async function checkAgentProviders(deps, agents, onProgress) {
205
204
  const bundlesRoot = deps.bundlesRoot ?? (0, identity_1.getAgentBundlesRoot)();
206
205
  const degraded = [];
207
206
  for (const agent of [...new Set(agents)]) {
@@ -238,6 +237,58 @@ async function checkAgentProviders(deps, agentsOverride, onProgress) {
238
237
  }
239
238
  return degraded;
240
239
  }
240
+ function degradedFromStatusProviderRow(row) {
241
+ const binding = row.provider === "unconfigured" ? row.provider : `${row.provider} / ${row.model}`;
242
+ const detail = row.detail ? `: ${row.detail}` : "";
243
+ const fixHint = row.detail && row.detail.startsWith("ouro ")
244
+ ? `Run \`${row.detail}\`.`
245
+ : "Run `ouro status` or `ouro doctor` for provider details.";
246
+ return {
247
+ agent: row.agent,
248
+ errorReason: `${row.lane} provider ${binding} readiness is ${row.readiness}${detail}`,
249
+ fixHint,
250
+ };
251
+ }
252
+ function providerStatusRowsCoverAgents(rows, agents) {
253
+ for (const agent of agents) {
254
+ const lanes = new Set(rows.filter((row) => row.agent === agent).map((row) => row.lane));
255
+ if (!lanes.has("outward") || !lanes.has("inner"))
256
+ return false;
257
+ }
258
+ return true;
259
+ }
260
+ async function checkAgentProvidersFromDaemonStatus(deps, agents, onProgress) {
261
+ const uniqueAgents = [...new Set(agents)];
262
+ if (uniqueAgents.length === 0)
263
+ return [];
264
+ let response;
265
+ try {
266
+ response = await deps.sendCommand(deps.socketPath, { kind: "daemon.status" });
267
+ }
268
+ catch {
269
+ // Fall through to the same foreground-check fallback used for empty status responses.
270
+ }
271
+ if (!response || !response.ok)
272
+ return null;
273
+ const payload = (0, cli_render_1.parseStatusPayload)(response.data);
274
+ if (!payload)
275
+ return null;
276
+ const rows = payload.providers.filter((row) => uniqueAgents.includes(row.agent));
277
+ if (!providerStatusRowsCoverAgents(rows, uniqueAgents))
278
+ return null;
279
+ const degraded = [];
280
+ const degradedAgents = new Set();
281
+ for (const row of rows) {
282
+ if (row.readiness === "ready" || degradedAgents.has(row.agent))
283
+ continue;
284
+ degraded.push(degradedFromStatusProviderRow(row));
285
+ degradedAgents.add(row.agent);
286
+ }
287
+ onProgress?.(degraded.length === 0
288
+ ? "provider readiness confirmed by daemon status"
289
+ : "provider readiness reported by daemon status");
290
+ return degraded;
291
+ }
241
292
  async function checkAgentProviderHealth(agentName, bundlesRoot, deps, onProgress, options = {}) {
242
293
  const liveDeps = {};
243
294
  if (deps.homeDir)
@@ -402,7 +453,11 @@ function managedAgentsSignature(agentNames) {
402
453
  return unique.length > 0 ? unique.join(",") : "(none)";
403
454
  }
404
455
  async function checkAlreadyRunningAgentProviders(deps, onProgress) {
405
- return checkAgentProviders(deps, undefined, onProgress);
456
+ const agents = await listCliAgents(deps);
457
+ const statusResult = await checkAgentProvidersFromDaemonStatus(deps, agents, onProgress);
458
+ if (statusResult)
459
+ return statusResult;
460
+ return checkAgentProviders(deps, agents, onProgress);
406
461
  }
407
462
  function readinessIssueFromDegraded(entry) {
408
463
  return entry.issue ?? (0, readiness_repair_1.genericReadinessIssue)({
@@ -490,10 +545,8 @@ function writeSyncProbeSummary(deps, findings) {
490
545
  }
491
546
  deps.writeStdout(lines.join("\n"));
492
547
  }
493
- function bootPhasePlan(daemonAlive) {
494
- return daemonAlive
495
- ? ["update check", "system setup", "sync probe", "starting daemon", "provider checks", "final daemon check"]
496
- : ["update check", "system setup", "sync probe", "provider checks", "starting daemon", "final daemon check"];
548
+ function bootPhasePlan(_daemonAlive) {
549
+ return ["update check", "system setup", "sync probe", "starting daemon", "provider checks", "final daemon check"];
497
550
  }
498
551
  /**
499
552
  * Layer 2: brief, scannable summary of a boot-sync-probe finding for the
@@ -601,25 +654,6 @@ async function verifyDaemonReadyForHandoff(deps) {
601
654
  };
602
655
  }
603
656
  }
604
- async function reportPostRepairProviderHealth(deps, repairedAgents, onProgress) {
605
- const remainingDegraded = await checkAgentProviders(deps, repairedAgents, onProgress);
606
- (0, runtime_1.emitNervesEvent)({
607
- level: remainingDegraded.length > 0 ? "warn" : "info",
608
- component: "daemon",
609
- event: "daemon.post_repair_provider_check",
610
- message: remainingDegraded.length > 0
611
- ? "post-repair provider health check still degraded"
612
- : "post-repair provider health check recovered",
613
- meta: { degradedCount: remainingDegraded.length, repairedAgents },
614
- });
615
- if (remainingDegraded.length === 0) {
616
- deps.writeStdout("All set. Provider checks recovered after repair.");
617
- return remainingDegraded;
618
- }
619
- writeProviderRepairSummary(deps, "Still needs attention", remainingDegraded);
620
- deps.writeStdout("Run `ouro up` again after these are fixed.");
621
- return remainingDegraded;
622
- }
623
657
  async function checkProviderHealthBeforeChat(agentName, deps) {
624
658
  const bundlesRoot = deps.bundlesRoot ?? (0, identity_1.getAgentBundlesRoot)();
625
659
  const result = await checkAgentProviderHealth(agentName, bundlesRoot, deps);
@@ -4300,7 +4334,7 @@ function pingAttemptCount(result) {
4300
4334
  return undefined;
4301
4335
  }
4302
4336
  async function readProviderCredentialRecord(agent, provider, _deps, options = {}) {
4303
- const poolResult = await (0, provider_credentials_1.refreshProviderCredentialPool)(agent, options);
4337
+ const poolResult = await (0, provider_credentials_1.refreshProviderCredentialPool)(agent, { ...options, providers: [provider] });
4304
4338
  if (poolResult.ok) {
4305
4339
  const existing = poolResult.pool.providers[provider];
4306
4340
  if (existing)
@@ -4506,15 +4540,23 @@ function renderProviderCredentialLine(agentName, credential) {
4506
4540
  async function executeProviderStatus(command, deps) {
4507
4541
  const agentRoot = providerCliAgentRoot(command, deps);
4508
4542
  const progress = createHumanCommandProgress(deps, "provider status");
4543
+ const stateResult = (0, provider_state_1.readProviderState)(agentRoot);
4509
4544
  try {
4510
- await runCommandProgressPhase(progress, "reading provider credentials", () => (0, provider_credentials_1.refreshProviderCredentialPool)(command.agent, {
4511
- onProgress: (message) => progress.updateDetail(message),
4512
- }), (poolResult) => {
4513
- if (!poolResult.ok)
4514
- return poolResult.reason;
4515
- const summary = (0, provider_credentials_1.summarizeProviderCredentialPool)(poolResult.pool);
4516
- return summary.providers.map((provider) => provider.provider).join(", ") || "none stored";
4517
- });
4545
+ if (stateResult.ok) {
4546
+ const selectedProviders = [...new Set([
4547
+ stateResult.state.lanes.outward.provider,
4548
+ stateResult.state.lanes.inner.provider,
4549
+ ])];
4550
+ await runCommandProgressPhase(progress, "reading selected provider credentials", () => (0, provider_credentials_1.refreshProviderCredentialPool)(command.agent, {
4551
+ providers: selectedProviders,
4552
+ onProgress: (message) => progress.updateDetail(message),
4553
+ }), (poolResult) => {
4554
+ if (!poolResult.ok)
4555
+ return poolResult.reason;
4556
+ const summary = (0, provider_credentials_1.summarizeProviderCredentialPool)(poolResult.pool);
4557
+ return summary.providers.map((provider) => provider.provider).join(", ") || "none stored";
4558
+ });
4559
+ }
4518
4560
  }
4519
4561
  finally {
4520
4562
  progress.end();
@@ -5620,7 +5662,7 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5620
5662
  // ── versioned CLI update check ──
5621
5663
  if (deps.checkForCliUpdate) {
5622
5664
  progress.startPhase("update check");
5623
- progress.updateDetail("checking npm registry\ncontinuing startup if it stays quiet");
5665
+ progress.updateDetail(`current runtime: ${(0, bundle_manifest_1.getPackageVersion)()}\nchecking npm registry\ncontinuing startup if it stays quiet`);
5624
5666
  let pendingReExec = false;
5625
5667
  let updateCheckStatus = "up to date";
5626
5668
  try {
@@ -5785,41 +5827,6 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5785
5827
  }
5786
5828
  const daemonAliveBeforeStart = await deps.checkSocketAlive(deps.socketPath);
5787
5829
  progress.setPhasePlan?.(bootPhasePlan(daemonAliveBeforeStart));
5788
- let providerChecksAlreadyRun = false;
5789
- if (!daemonAliveBeforeStart) {
5790
- progress.startPhase("provider checks");
5791
- const preflightProviderDegraded = await checkAgentProviders(deps, undefined, (msg) => progress.updateDetail(msg));
5792
- providerChecksAlreadyRun = true;
5793
- progress.completePhase("provider checks", providerRepairCountSummary(preflightProviderDegraded.length));
5794
- if (preflightProviderDegraded.length > 0) {
5795
- progress.end();
5796
- if (command.noRepair) {
5797
- writeProviderRepairSummary(deps, "Provider checks need attention", preflightProviderDegraded);
5798
- // Layer 4: drift advisories ride along with the provider-repair
5799
- // summary under --no-repair. Non-blocking; failure to collect
5800
- // findings (e.g. malformed agent.json on one bundle) is swallowed
5801
- // by `collectAgentDriftAdvisories` so the rest of the boot path
5802
- // is unaffected.
5803
- const driftAdvisories = await collectAgentDriftAdvisories(deps);
5804
- writeDriftAdvisorySummary(deps, driftAdvisories);
5805
- const message = "daemon not started: provider checks need repair. Run `ouro repair` or rerun `ouro up` to choose a repair path.";
5806
- return returnCliFailure(deps, message);
5807
- }
5808
- const repairResult = await runReadinessRepairForDegraded(preflightProviderDegraded, deps);
5809
- if (!repairResult.repairsAttempted) {
5810
- writeProviderRepairSummary(deps, "Provider checks still need attention", repairResult.remainingDegraded);
5811
- const message = "daemon not started: provider checks need repair. Run `ouro repair` or rerun `ouro up` to choose a repair path.";
5812
- return returnCliFailure(deps, message);
5813
- }
5814
- const remainingDegraded = repairResult.remainingDegraded;
5815
- if (remainingDegraded.length > 0) {
5816
- writeProviderRepairSummary(deps, "Still needs attention", remainingDegraded);
5817
- const message = "daemon not started: provider checks still need repair.";
5818
- return returnCliFailure(deps, message);
5819
- }
5820
- deps.writeStdout("All set. Provider checks recovered after repair.");
5821
- }
5822
- }
5823
5830
  progress.startPhase("starting daemon");
5824
5831
  const daemonResult = await ensureDaemonRunning({
5825
5832
  ...deps,
@@ -5835,12 +5842,10 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5835
5842
  return returnCliFailure(deps, daemonResult.message);
5836
5843
  }
5837
5844
  progress.completePhase("starting daemon", daemonProgressSummary(daemonResult));
5838
- if (!providerChecksAlreadyRun || daemonResult.alreadyRunning) {
5839
- progress.startPhase("provider checks");
5840
- const providerDegraded = await checkAlreadyRunningAgentProviders(deps, (msg) => progress.updateDetail(msg));
5841
- daemonResult.stability = mergeStartupStability(daemonResult.stability, providerDegraded);
5842
- progress.completePhase("provider checks", providerRepairCountSummary(providerDegraded.length));
5843
- }
5845
+ progress.startPhase("provider checks");
5846
+ const providerDegraded = await checkAlreadyRunningAgentProviders(deps, (msg) => progress.updateDetail(msg));
5847
+ daemonResult.stability = mergeStartupStability(daemonResult.stability, providerDegraded);
5848
+ progress.completePhase("provider checks", providerRepairCountSummary(providerDegraded.length));
5844
5849
  progress.startPhase("final daemon check");
5845
5850
  const finalDaemonCheck = await verifyDaemonReadyForHandoff(deps);
5846
5851
  if (!finalDaemonCheck.ok) {
@@ -5861,6 +5866,7 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5861
5866
  // degraded summary too — same rationale as the preflight path.
5862
5867
  const driftAdvisories = await collectAgentDriftAdvisories(deps);
5863
5868
  writeDriftAdvisorySummary(deps, driftAdvisories);
5869
+ deps.setExitCode?.(1);
5864
5870
  (0, runtime_1.emitNervesEvent)({
5865
5871
  level: "warn",
5866
5872
  component: "daemon",
@@ -5876,13 +5882,13 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5876
5882
  const typedDegraded = daemonResult.stability.degraded.filter((entry) => (0, readiness_repair_1.isKnownReadinessIssue)(entry.issue));
5877
5883
  const untypedDegraded = daemonResult.stability.degraded.filter((entry) => !(0, readiness_repair_1.isKnownReadinessIssue)(entry.issue));
5878
5884
  let repairsAttempted = false;
5879
- const repairedAgents = new Set();
5885
+ let remainingDegraded = [];
5880
5886
  if (typedDegraded.length > 0) {
5881
5887
  const guidedRepair = await runReadinessRepairForDegraded(typedDegraded, deps);
5882
5888
  if (guidedRepair.repairsAttempted) {
5883
5889
  repairsAttempted = true;
5884
- typedDegraded.forEach((entry) => repairedAgents.add(entry.agent));
5885
5890
  }
5891
+ remainingDegraded = mergeRemainingDegraded(remainingDegraded, guidedRepair.remainingDegraded);
5886
5892
  }
5887
5893
  // Layer 3: extended activation contract — fires when there are
5888
5894
  // untyped degraded entries OR when typed entries stack to ≥3 (compound
@@ -5962,15 +5968,32 @@ async function runOuroCli(args, deps = (0, cli_defaults_1.createDefaultOuroCliDe
5962
5968
  });
5963
5969
  if (repairResult.repairsAttempted) {
5964
5970
  repairsAttempted = true;
5965
- untypedDegraded
5971
+ const repairedUntypedAgents = untypedDegraded
5966
5972
  .filter(interactive_repair_1.hasRunnableInteractiveRepair)
5967
- .forEach((entry) => repairedAgents.add(entry.agent));
5973
+ .map((entry) => entry.agent);
5974
+ const recheckedUntyped = repairedUntypedAgents.length > 0
5975
+ ? await checkAgentProviders(deps, repairedUntypedAgents, (msg) => progress.updateDetail(msg))
5976
+ : [];
5977
+ const untouchedUntyped = untypedDegraded.filter((entry) => !repairedUntypedAgents.includes(entry.agent));
5978
+ remainingDegraded = mergeRemainingDegraded([...remainingDegraded, ...untouchedUntyped], recheckedUntyped);
5979
+ }
5980
+ else {
5981
+ remainingDegraded = mergeRemainingDegraded(remainingDegraded, untypedDegraded);
5968
5982
  }
5969
5983
  }
5970
5984
  if (repairsAttempted) {
5971
5985
  progress.startPhase("post-repair check");
5972
- await reportPostRepairProviderHealth(deps, [...repairedAgents], (msg) => progress.updateDetail(msg));
5973
- progress.completePhase("post-repair check", providerRepairCountSummary(repairedAgents.size));
5986
+ progress.completePhase("post-repair check", providerRepairCountSummary(remainingDegraded.length));
5987
+ if (remainingDegraded.length === 0) {
5988
+ deps.writeStdout("All set. Provider checks recovered after repair.");
5989
+ }
5990
+ else {
5991
+ writeProviderRepairSummary(deps, "Still needs attention", remainingDegraded);
5992
+ deps.writeStdout("Run `ouro up` again after these are fixed.");
5993
+ }
5994
+ }
5995
+ else if (untypedDegraded.length > 0) {
5996
+ writeProviderRepairSummary(deps, "Provider checks need attention", remainingDegraded);
5974
5997
  }
5975
5998
  }
5976
5999
  }
@@ -113,6 +113,7 @@ exports.HEALTH_TRACKED_EVENTS = new Set([
113
113
  "daemon.agent_config_failure",
114
114
  "daemon.agent_entry_missing",
115
115
  "daemon.agent_spawn_failed",
116
+ "daemon.agent_startup_stale_recovered",
116
117
  "daemon.agent_restart_exhausted",
117
118
  "daemon.agent_permanent_failure",
118
119
  "daemon.agent_cooldown_recovery",
@@ -66,6 +66,7 @@ const outlook_types_1 = require("../outlook/outlook-types");
66
66
  const outlook_read_1 = require("../outlook/outlook-read");
67
67
  const outlook_view_1 = require("../outlook/outlook-view");
68
68
  const provider_visibility_1 = require("../provider-visibility");
69
+ const socket_client_1 = require("./socket-client");
69
70
  const PIDFILE_PATH = path.join(os.homedir(), ".ouro-cli", "daemon.pids");
70
71
  /**
71
72
  * Defense-in-depth: detect if we're running under vitest. The pidfile lives
@@ -205,7 +206,20 @@ function runPsCheck(pids) {
205
206
  * manual cleanup). The scope is narrow on purpose — see parseOrphanPidsFromPs.
206
207
  */
207
208
  /* v8 ignore start -- process lifecycle: uses kill/ps, tested via deployment @preserve */
208
- function killOrphanProcesses() {
209
+ function isProductionDaemonSocketPath(socketPath) {
210
+ return socketPath === socket_client_1.DEFAULT_DAEMON_SOCKET_PATH;
211
+ }
212
+ function killOrphanProcesses(socketPath = socket_client_1.DEFAULT_DAEMON_SOCKET_PATH) {
213
+ if (!isProductionDaemonSocketPath(socketPath)) {
214
+ (0, runtime_1.emitNervesEvent)({
215
+ level: "warn",
216
+ component: "daemon",
217
+ event: "daemon.orphan_cleanup_nonproduction_blocked",
218
+ message: "blocked orphan cleanup for non-production daemon socket",
219
+ meta: { socketPath, pidfilePath: PIDFILE_PATH },
220
+ });
221
+ return;
222
+ }
209
223
  if (isVitestProcess()) {
210
224
  (0, runtime_1.emitNervesEvent)({
211
225
  level: "warn",
@@ -271,7 +285,17 @@ function killOrphanProcesses() {
271
285
  * Write all managed PIDs (daemon + children) to the pidfile.
272
286
  * Called after all agents and senses are spawned.
273
287
  */
274
- function writePidfile(extraPids = []) {
288
+ function writePidfile(extraPids = [], socketPath = socket_client_1.DEFAULT_DAEMON_SOCKET_PATH) {
289
+ if (!isProductionDaemonSocketPath(socketPath)) {
290
+ (0, runtime_1.emitNervesEvent)({
291
+ level: "warn",
292
+ component: "daemon",
293
+ event: "daemon.write_pidfile_nonproduction_blocked",
294
+ message: "blocked production pidfile write for non-production daemon socket",
295
+ meta: { socketPath, pidfilePath: PIDFILE_PATH, attemptedPids: extraPids.length },
296
+ });
297
+ return;
298
+ }
275
299
  if (isVitestProcess()) {
276
300
  (0, runtime_1.emitNervesEvent)({
277
301
  level: "warn",
@@ -392,6 +416,7 @@ class OuroDaemon {
392
416
  server = null;
393
417
  outlookServer = null;
394
418
  socketIdentity = null;
419
+ senseAutostartTimer = null;
395
420
  outlookServerFactory;
396
421
  constructor(options) {
397
422
  this.socketPath = options.socketPath;
@@ -579,15 +604,16 @@ class OuroDaemon {
579
604
  // MCP connections are lazily initialized per-agent during senseTurn
580
605
  // (daemon manages multiple agents; agent identity must be set before loading MCP config)
581
606
  /* v8 ignore start -- orphan cleanup + pidfile: calls process management functions @preserve */
582
- killOrphanProcesses();
607
+ killOrphanProcesses(this.socketPath);
583
608
  /* v8 ignore stop */
584
- await this.processManager.startAutoStartAgents();
585
- await this.senseManager?.startAutoStartSenses();
609
+ await this.openCommandSocket();
610
+ this.triggerAutoStartAgents();
611
+ this.triggerAutoStartSensesWhenAgentsSettled();
586
612
  // Write all managed PIDs to disk so the next daemon can clean up
587
613
  /* v8 ignore start -- pidfile write: collects PIDs from process managers @preserve */
588
614
  const agentPids = this.processManager.listAgentSnapshots().map((s) => s.pid).filter((p) => p !== null);
589
615
  const sensePids = this.senseManager?.listManagedPids?.() ?? [];
590
- writePidfile([...agentPids, ...sensePids]);
616
+ writePidfile([...agentPids, ...sensePids], this.socketPath);
591
617
  /* v8 ignore stop */
592
618
  this.scheduler.start?.();
593
619
  await this.scheduler.reconcile?.();
@@ -608,6 +634,55 @@ class OuroDaemon {
608
634
  meta: { port: outlook_types_1.OUTLOOK_DEFAULT_PORT },
609
635
  });
610
636
  }
637
+ }
638
+ triggerAutoStartAgents() {
639
+ if (this.processManager.triggerAutoStartAgents) {
640
+ this.processManager.triggerAutoStartAgents();
641
+ return;
642
+ }
643
+ void this.processManager.startAutoStartAgents().catch((error) => {
644
+ (0, runtime_1.emitNervesEvent)({
645
+ level: "error",
646
+ component: "daemon",
647
+ event: "daemon.agent_autostart_error",
648
+ message: "agent autostart failed after daemon socket opened",
649
+ meta: { error: error instanceof Error ? error.message : String(error) },
650
+ });
651
+ });
652
+ }
653
+ triggerAutoStartSenses() {
654
+ /* v8 ignore next -- defensive: callers already check senseManager before delegating here @preserve */
655
+ if (!this.senseManager)
656
+ return;
657
+ if (this.senseManager.triggerAutoStartSenses) {
658
+ this.senseManager.triggerAutoStartSenses();
659
+ return;
660
+ }
661
+ void this.senseManager.startAutoStartSenses().catch((error) => {
662
+ (0, runtime_1.emitNervesEvent)({
663
+ level: "error",
664
+ component: "daemon",
665
+ event: "daemon.sense_autostart_error",
666
+ message: "sense autostart failed after daemon socket opened",
667
+ meta: { error: error instanceof Error ? error.message : String(error) },
668
+ });
669
+ });
670
+ }
671
+ triggerAutoStartSensesWhenAgentsSettled() {
672
+ if (!this.senseManager)
673
+ return;
674
+ const waitingOnAgents = this.processManager.listAgentSnapshots()
675
+ .some((snapshot) => snapshot.status === "starting");
676
+ if (!waitingOnAgents) {
677
+ this.triggerAutoStartSenses();
678
+ return;
679
+ }
680
+ this.senseAutostartTimer = setTimeout(() => {
681
+ this.senseAutostartTimer = null;
682
+ this.triggerAutoStartSensesWhenAgentsSettled();
683
+ }, 250);
684
+ }
685
+ async openCommandSocket() {
611
686
  if (fs.existsSync(this.socketPath)) {
612
687
  fs.unlinkSync(this.socketPath);
613
688
  }
@@ -831,6 +906,10 @@ class OuroDaemon {
831
906
  (0, update_checker_1.stopUpdateChecker)();
832
907
  (0, mcp_manager_1.shutdownSharedMcpManager)();
833
908
  this.scheduler.stop?.();
909
+ if (this.senseAutostartTimer) {
910
+ clearTimeout(this.senseAutostartTimer);
911
+ this.senseAutostartTimer = null;
912
+ }
834
913
  await this.processManager.stopAll();
835
914
  await this.senseManager?.stopAll();
836
915
  if (this.server) {