@slock-ai/daemon 0.57.2 → 0.57.3-play.20260609141516

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1438,9 +1438,9 @@ Error code prefixes tell you the layer:
1438
1438
  function buildCredentialHygieneSection() {
1439
1439
  return `### Credential hygiene
1440
1440
 
1441
- **Never paste credentials into Slock messages, attachments, or task fields.** Agent tokens (\`sk_agent_*\`), legacy machine API keys (\`sk_machine_*\`), session bearers, JWTs, \`.env\` files, or \`credential.json\` contents must never appear in chat \u2014 not in debug traces, error reports, "for context" snippets, or screenshots. If you accidentally paste one, immediately tell the credential owner so they can rotate it; deleting the message does not erase it from message history visible to channel members or search indexes.
1441
+ **Never paste credentials into public Slock channels, public-channel threads, or public-channel task/attachment fields.** Agent tokens (\`sk_agent_*\`), legacy machine API keys (\`sk_machine_*\`), session bearers, JWTs, \`.env\` files, or \`credential.json\` contents must not appear in public channel chat. DMs and private channels are allowed for authorized secret handoff, but verify the audience first. If you accidentally paste one into a public channel, immediately tell the credential owner so they can rotate it.
1442
1442
 
1443
- If a tool or error output contains credential-shaped strings, redact them to \`sk_agent_<redacted>\` / \`sk_machine_<redacted>\` shape before reposting.
1443
+ If a tool or error output contains credential-shaped strings, redact them to \`sk_agent_<redacted>\` / \`sk_machine_<redacted>\` shape before posting to a public channel.
1444
1444
 
1445
1445
  **Profile credential resolution is strict.** When invoked as \`slock --profile <slug>\` or with \`SLOCK_PROFILE=<slug>\`, the CLI resolves credentials from \`$SLOCK_PROFILE_DIR\` \u2192 \`$SLOCK_HOME/profiles/<slug>\` \u2192 \`$HOME/.slock/profiles/<slug>\` in that order. It does **not** fall back to a different profile's credential, to an ambient user-level token, or to environment-leaked secrets \u2014 if your designated profile credential is missing or unreadable, the CLI fails closed rather than authenticating as someone else.`;
1446
1446
  }
@@ -1808,9 +1808,9 @@ You have MCP tools from the "chat" server. Use ONLY these for communication:
1808
1808
  17. **${cancelReminderCmd}** \u2014 Cancel one of your reminders by ID.`;
1809
1809
  const credentialHygieneSection = isCli ? cliGuideSections.credentialHygiene : `### Credential hygiene
1810
1810
 
1811
- **Never paste credentials into Slock messages, attachments, or task fields.** Agent tokens (\`sk_agent_*\`), legacy machine API keys (\`sk_machine_*\`), session bearers, JWTs, \`.env\` files, or \`credential.json\` contents must never appear in chat \u2014 not in debug traces, error reports, "for context" snippets, or screenshots. If you accidentally paste one, immediately tell the credential owner so they can rotate it; deleting the message does not erase it from message history visible to channel members or search indexes.
1811
+ **Never paste credentials into public Slock channels, public-channel threads, or public-channel task/attachment fields.** Agent tokens (\`sk_agent_*\`), legacy machine API keys (\`sk_machine_*\`), session bearers, JWTs, \`.env\` files, or \`credential.json\` contents must not appear in public channel chat. DMs and private channels are allowed for authorized secret handoff, but verify the audience first. If you accidentally paste one into a public channel, immediately tell the credential owner so they can rotate it.
1812
1812
 
1813
- If a tool or error output contains credential-shaped strings, redact them to \`sk_agent_<redacted>\` / \`sk_machine_<redacted>\` shape before reposting.`;
1813
+ If a tool or error output contains credential-shaped strings, redact them to \`sk_agent_<redacted>\` / \`sk_machine_<redacted>\` shape before posting to a public channel.`;
1814
1814
  const reminderSection = isCli ? cliGuideSections.reminders : `### Reminders
1815
1815
 
1816
1816
  Use reminders for follow-up that depends on future state you cannot resolve now, whether user-requested or self-driven. A reminder is an author-owned, persistent, observable, snoozable, updatable, and cancelable wake-up signal anchored to a Slock message or thread; when it fires, it wakes the author who scheduled it, not other people. If anchored to a message or thread, the receipt/fire system message is visible in that surface, but wake ownership does not transfer. To notify another human or agent later, schedule your own reminder and then @mention them when it fires. Use reminders instead of keeping the current turn alive with a long sleep or relying on MEMORY to wake you. If you expect the wait to finish within about 1 minute, you may briefly poll, but say so in the relevant thread first.
@@ -2081,6 +2081,19 @@ function listLegacySlockStatePaths(slockHome = resolveSlockHome(), homeDir = os.
2081
2081
  return candidates.filter((candidate) => existsSync(candidate.path));
2082
2082
  }
2083
2083
 
2084
+ // src/authEnv.ts
2085
+ var DAEMON_API_KEY_ENV = "SLOCK_MACHINE_API_KEY";
2086
+ var SLOCK_AGENT_TOKEN_ENV = "SLOCK_AGENT_TOKEN";
2087
+ function scrubDaemonAuthEnv(env) {
2088
+ delete env[DAEMON_API_KEY_ENV];
2089
+ return env;
2090
+ }
2091
+ function scrubDaemonChildEnv(env) {
2092
+ delete env[DAEMON_API_KEY_ENV];
2093
+ delete env[SLOCK_AGENT_TOKEN_ENV];
2094
+ return env;
2095
+ }
2096
+
2084
2097
  // src/agentCredentialProxy.ts
2085
2098
  import { randomBytes } from "crypto";
2086
2099
  import http from "http";
@@ -3567,7 +3580,9 @@ var LOOPBACK_NO_PROXY = "127.0.0.1,localhost";
3567
3580
  var CLI_TRANSPORT_TRACE_DIR_ENV = "SLOCK_CLI_TRANSPORT_TRACE_DIR";
3568
3581
  var safePathPart = (value) => value.replace(/[^a-zA-Z0-9_.-]/g, "_");
3569
3582
  var RAW_CREDENTIAL_ENV_DENYLIST = [
3570
- "SLOCK_AGENT_CREDENTIAL_KEY"
3583
+ "SLOCK_AGENT_TOKEN",
3584
+ "SLOCK_AGENT_CREDENTIAL_KEY",
3585
+ "SLOCK_AGENT_CREDENTIAL_KEY_FILE"
3571
3586
  ];
3572
3587
  var cachedOpencliBinPath;
3573
3588
  function resolveOpencliBinPath() {
@@ -3782,7 +3797,7 @@ exec ${shellSingleQuote(process.execPath)} ${shellSingleQuote(opencliBinPath)} "
3782
3797
  ...agentCredentialProxy ? {} : { SLOCK_AGENT_TOKEN_FILE: tokenFile },
3783
3798
  PATH: `${slockDir}${path2.delimiter}${process.env.PATH ?? ""}`
3784
3799
  };
3785
- delete spawnEnv.SLOCK_AGENT_TOKEN;
3800
+ scrubDaemonChildEnv(spawnEnv);
3786
3801
  for (const key of RAW_CREDENTIAL_ENV_DENYLIST) {
3787
3802
  delete spawnEnv[key];
3788
3803
  }
@@ -4211,7 +4226,7 @@ function resolveCommandOnWindows(command, env, execFileSyncFn, existsSyncFn) {
4211
4226
  }
4212
4227
  function resolveCommandOnPath(command, deps = {}) {
4213
4228
  const platform = deps.platform ?? process.platform;
4214
- const env = withWindowsUserEnvironment(deps.env ?? process.env, deps);
4229
+ const env = scrubDaemonChildEnv({ ...withWindowsUserEnvironment(deps.env ?? process.env, deps) });
4215
4230
  const execFileSyncFn = deps.execFileSyncFn ?? execFileSync;
4216
4231
  const existsSyncFn = deps.existsSyncFn ?? existsSync2;
4217
4232
  if (platform === "win32") {
@@ -4237,7 +4252,7 @@ function firstExistingPath(candidates, deps = {}) {
4237
4252
  return null;
4238
4253
  }
4239
4254
  function readCommandVersion(command, args = [], deps = {}) {
4240
- const env = withWindowsUserEnvironment(deps.env ?? process.env, deps);
4255
+ const env = scrubDaemonChildEnv({ ...withWindowsUserEnvironment(deps.env ?? process.env, deps) });
4241
4256
  const execFileSyncFn = deps.execFileSyncFn ?? execFileSync;
4242
4257
  try {
4243
4258
  const output = normalizeExecOutput(execFileSyncFn(command, [...args, "--version"], {
@@ -5591,11 +5606,11 @@ function detectCursorModels(runCommand = runCursorModelsCommand) {
5591
5606
  return parseCursorModelsOutput(String(result.stdout || ""));
5592
5607
  }
5593
5608
  function buildCursorModelProbeEnv(deps = {}) {
5594
- return withWindowsUserEnvironment({
5609
+ return scrubDaemonChildEnv(withWindowsUserEnvironment({
5595
5610
  ...deps.env ?? process.env,
5596
5611
  FORCE_COLOR: "0",
5597
5612
  NO_COLOR: "1"
5598
- }, deps);
5613
+ }, deps));
5599
5614
  }
5600
5615
  function runCursorModelsCommand() {
5601
5616
  return spawnSync("cursor-agent", ["models"], {
@@ -5651,7 +5666,7 @@ function resolveGeminiSpawn(commandArgs, deps = {}) {
5651
5666
  }
5652
5667
  const execFileSyncFn = deps.execFileSyncFn ?? execFileSync3;
5653
5668
  const existsSyncFn = deps.existsSyncFn ?? existsSync4;
5654
- const env = deps.env ?? process.env;
5669
+ const env = scrubDaemonChildEnv({ ...deps.env ?? process.env });
5655
5670
  const winPath = path6.win32;
5656
5671
  let geminiEntry = null;
5657
5672
  try {
@@ -5791,12 +5806,15 @@ var GeminiDriver = class {
5791
5806
  // src/drivers/kimi.ts
5792
5807
  import { randomUUID as randomUUID2 } from "crypto";
5793
5808
  import { spawn as spawn7 } from "child_process";
5794
- import { existsSync as existsSync5, readFileSync as readFileSync3, writeFileSync as writeFileSync3 } from "fs";
5809
+ import { chmodSync, existsSync as existsSync5, readFileSync as readFileSync3, writeFileSync as writeFileSync3 } from "fs";
5795
5810
  import os3 from "os";
5796
5811
  import path7 from "path";
5797
5812
  var KIMI_WIRE_PROTOCOL_VERSION = "1.3";
5798
5813
  var KIMI_SYSTEM_PROMPT_FILE = ".slock-kimi-system.md";
5799
5814
  var KIMI_AGENT_FILE = ".slock-kimi-agent.yaml";
5815
+ var KIMI_GENERATED_CONFIG_FILE = ".slock-kimi-config.toml";
5816
+ var SLOCK_KIMI_CONFIG_CONTENT_ENV = "SLOCK_KIMI_CONFIG_CONTENT";
5817
+ var SLOCK_KIMI_CONFIG_FILE_ENV = "SLOCK_KIMI_CONFIG_FILE";
5800
5818
  function parseToolArguments(raw) {
5801
5819
  if (typeof raw !== "string") return raw;
5802
5820
  try {
@@ -5805,6 +5823,73 @@ function parseToolArguments(raw) {
5805
5823
  return raw;
5806
5824
  }
5807
5825
  }
5826
+ function readKimiConfigSource(home = os3.homedir(), env = process.env) {
5827
+ const inlineConfig = env[SLOCK_KIMI_CONFIG_CONTENT_ENV];
5828
+ if (inlineConfig && inlineConfig.trim()) {
5829
+ return {
5830
+ raw: inlineConfig,
5831
+ explicitPath: null,
5832
+ sourcePath: SLOCK_KIMI_CONFIG_CONTENT_ENV
5833
+ };
5834
+ }
5835
+ const explicitPath = env[SLOCK_KIMI_CONFIG_FILE_ENV];
5836
+ const configPath = explicitPath && explicitPath.trim() ? explicitPath : path7.join(home, ".kimi", "config.toml");
5837
+ try {
5838
+ return {
5839
+ raw: readFileSync3(configPath, "utf8"),
5840
+ explicitPath: explicitPath && explicitPath.trim() ? explicitPath : null,
5841
+ sourcePath: configPath
5842
+ };
5843
+ } catch {
5844
+ return {
5845
+ raw: null,
5846
+ explicitPath: explicitPath && explicitPath.trim() ? explicitPath : null,
5847
+ sourcePath: configPath
5848
+ };
5849
+ }
5850
+ }
5851
+ function buildKimiSpawnEnv(env = process.env) {
5852
+ const spawnEnv = { ...env, FORCE_COLOR: "0", NO_COLOR: "1" };
5853
+ delete spawnEnv[SLOCK_KIMI_CONFIG_CONTENT_ENV];
5854
+ delete spawnEnv[SLOCK_KIMI_CONFIG_FILE_ENV];
5855
+ return scrubDaemonChildEnv(spawnEnv);
5856
+ }
5857
+ function buildKimiEffectiveEnv(ctx, overrideEnv) {
5858
+ return {
5859
+ ...process.env,
5860
+ ...ctx.config.envVars || {},
5861
+ ...overrideEnv || {}
5862
+ };
5863
+ }
5864
+ function buildKimiLaunchOptions(ctx, opts = {}) {
5865
+ const env = buildKimiEffectiveEnv(ctx, opts.env);
5866
+ const source = readKimiConfigSource(opts.home ?? os3.homedir(), env);
5867
+ const args = [];
5868
+ let configFilePath = null;
5869
+ let configContent = null;
5870
+ if (source.explicitPath) {
5871
+ configFilePath = source.explicitPath;
5872
+ } else if (source.raw !== null && source.sourcePath === SLOCK_KIMI_CONFIG_CONTENT_ENV) {
5873
+ configFilePath = path7.join(ctx.workingDirectory, KIMI_GENERATED_CONFIG_FILE);
5874
+ configContent = source.raw;
5875
+ if (opts.writeGeneratedConfig !== false) {
5876
+ writeFileSync3(configFilePath, source.raw, { encoding: "utf8", mode: 384 });
5877
+ chmodSync(configFilePath, 384);
5878
+ }
5879
+ }
5880
+ if (configFilePath) {
5881
+ args.push("--config-file", configFilePath);
5882
+ }
5883
+ if (ctx.config.model && ctx.config.model !== "default") {
5884
+ args.push("--model", ctx.config.model);
5885
+ }
5886
+ return {
5887
+ args,
5888
+ env: buildKimiSpawnEnv(env),
5889
+ configFilePath,
5890
+ configContent
5891
+ };
5892
+ }
5808
5893
  function resolveKimiSpawn(commandArgs, deps = {}) {
5809
5894
  return {
5810
5895
  command: resolveCommandOnPath("kimi", deps) ?? "kimi",
@@ -5828,7 +5913,25 @@ var KimiDriver = class {
5828
5913
  };
5829
5914
  model = {
5830
5915
  detectedModelsVerifiedAs: "launchable",
5831
- toLaunchSpec: (modelId) => ({ args: ["--model", modelId] })
5916
+ toLaunchSpec: (modelId, ctx, opts) => {
5917
+ if (!ctx) return { args: ["--model", modelId] };
5918
+ const launchCtx = {
5919
+ ...ctx,
5920
+ config: {
5921
+ ...ctx.config,
5922
+ model: modelId
5923
+ }
5924
+ };
5925
+ const launch = buildKimiLaunchOptions(launchCtx, {
5926
+ home: opts?.home,
5927
+ writeGeneratedConfig: false
5928
+ });
5929
+ return {
5930
+ args: launch.args,
5931
+ env: launch.env,
5932
+ configFiles: launch.configFilePath ? [launch.configFilePath] : void 0
5933
+ };
5934
+ }
5832
5935
  };
5833
5936
  supportsStdinNotification = true;
5834
5937
  mcpToolPrefix = "";
@@ -5854,21 +5957,23 @@ var KimiDriver = class {
5854
5957
  ` system_prompt_path: ./${KIMI_SYSTEM_PROMPT_FILE}`,
5855
5958
  ""
5856
5959
  ].join("\n"), "utf8");
5960
+ const launch = buildKimiLaunchOptions(ctx);
5857
5961
  const args = [
5858
5962
  "--wire",
5859
5963
  "--yolo",
5860
5964
  "--agent-file",
5861
5965
  agentFilePath,
5862
5966
  "--session",
5863
- this.sessionId
5967
+ this.sessionId,
5968
+ ...launch.args
5864
5969
  ];
5865
5970
  const launchRuntimeFields = runtimeConfigToLaunchFields(ctx.config);
5866
5971
  if (launchRuntimeFields.model && launchRuntimeFields.model !== "default") {
5867
5972
  args.push("--model", launchRuntimeFields.model);
5868
5973
  }
5869
5974
  const spawnEnv = (await prepareCliTransport(ctx, { NO_COLOR: "1" })).spawnEnv;
5870
- const launch = resolveKimiSpawn(args);
5871
- const proc = spawn7(launch.command, launch.args, {
5975
+ const spawnTarget = resolveKimiSpawn(args);
5976
+ const proc = spawn7(spawnTarget.command, spawnTarget.args, {
5872
5977
  cwd: ctx.workingDirectory,
5873
5978
  stdio: ["pipe", "pipe", "pipe"],
5874
5979
  env: spawnEnv,
@@ -5876,7 +5981,7 @@ var KimiDriver = class {
5876
5981
  // and has an 8191-character command-line limit. Kimi's official
5877
5982
  // installer/uv entrypoint is an executable, so launch it directly and
5878
5983
  // keep prompts on stdin / files instead of routing through cmd.exe.
5879
- shell: launch.shell
5984
+ shell: spawnTarget.shell
5880
5985
  });
5881
5986
  proc.stdin?.write(JSON.stringify({
5882
5987
  jsonrpc: "2.0",
@@ -5990,14 +6095,9 @@ var KimiDriver = class {
5990
6095
  return detectKimiModels();
5991
6096
  }
5992
6097
  };
5993
- function detectKimiModels(home = os3.homedir()) {
5994
- const configPath = path7.join(home, ".kimi", "config.toml");
5995
- let raw;
5996
- try {
5997
- raw = readFileSync3(configPath, "utf8");
5998
- } catch {
5999
- return null;
6000
- }
6098
+ function detectKimiModels(home = os3.homedir(), opts = {}) {
6099
+ const raw = readKimiConfigSource(home, opts.env).raw;
6100
+ if (raw === null) return null;
6001
6101
  const models = [];
6002
6102
  const sectionRe = /^\s*\[models(?:\.([^\]]+)|"\.[^"]+"|\."[^"]+")\s*\]\s*$/gm;
6003
6103
  const lineRe = /^\s*\[models\.(.+?)\s*\]\s*$/gm;
@@ -6237,7 +6337,7 @@ function runOpenCodeModelsCommand(home, deps = {}) {
6237
6337
  const platform = deps.platform ?? process.platform;
6238
6338
  const spawnSyncFn = deps.spawnSyncFn ?? spawnSync2;
6239
6339
  const result = spawnSyncFn("opencode", ["models"], {
6240
- env: { ...process.env, HOME: home, FORCE_COLOR: "0", NO_COLOR: "1" },
6340
+ env: scrubDaemonChildEnv({ ...process.env, HOME: home, FORCE_COLOR: "0", NO_COLOR: "1" }),
6241
6341
  encoding: "utf8",
6242
6342
  timeout: 5e3,
6243
6343
  shell: platform === "win32"
@@ -6513,6 +6613,7 @@ import {
6513
6613
  VERSION as PI_SDK_VERSION
6514
6614
  } from "@earendil-works/pi-coding-agent";
6515
6615
  var PI_SESSION_DIR = ".pi-sessions";
6616
+ var PI_SDK_COMPACTION_ENABLED = true;
6516
6617
  var PI_PROVIDER_LABELS = {
6517
6618
  google: "Google",
6518
6619
  openai: "OpenAI",
@@ -6711,7 +6812,7 @@ async function createPiAgentSessionForContext(ctx, sessionId) {
6711
6812
  if (launchRuntimeFields.model && launchRuntimeFields.model !== "default" && !model) {
6712
6813
  throw new Error(`Pi model not found: ${launchRuntimeFields.model}`);
6713
6814
  }
6714
- const settingsManager = SettingsManager.inMemory({ compaction: { enabled: false } });
6815
+ const settingsManager = SettingsManager.inMemory({ compaction: { enabled: PI_SDK_COMPACTION_ENABLED } });
6715
6816
  const resourceLoader = new DefaultResourceLoader({
6716
6817
  cwd: ctx.workingDirectory,
6717
6818
  agentDir,
@@ -7985,6 +8086,9 @@ var STDIN_NOTIFICATION_INITIAL_DELAY_MS = 3e3;
7985
8086
  var STDIN_NOTIFICATION_RETRY_DELAY_MS = 15e3;
7986
8087
  var RUNTIME_ERROR_DELIVERY_BACKOFF_BASE_MS = 1e4;
7987
8088
  var RUNTIME_ERROR_DELIVERY_BACKOFF_MAX_MS = 5 * 6e4;
8089
+ var SPAWN_FAIL_BACKOFF_BASE_MS = 1e3;
8090
+ var SPAWN_FAIL_BACKOFF_MAX_MS = 3e4;
8091
+ var SPAWN_FAIL_BACKOFF_THRESHOLD = 3;
7988
8092
  var COMPACTION_STALE_MS = 5 * 6e4;
7989
8093
  var RUNTIME_PROGRESS_STALE_MS = 15 * 6e4;
7990
8094
  var DEFAULT_RUNTIME_START_TIMEOUT_MS = 2 * 6e4;
@@ -8492,6 +8596,12 @@ function summarizeCrash(code, signal) {
8492
8596
  if (typeof code === "number") return `exit code ${code}`;
8493
8597
  return "unknown exit";
8494
8598
  }
8599
+ function currentErrorCandidates(ap) {
8600
+ return [
8601
+ ap.runtimeErrorSinceProgress ? ap.lastRuntimeError : null,
8602
+ ...ap.recentDecisionStderr
8603
+ ].filter((value) => !!value);
8604
+ }
8495
8605
  function classifyTerminalFailure(ap) {
8496
8606
  const candidates = [
8497
8607
  ap.lastRuntimeError,
@@ -8535,21 +8645,16 @@ function isCodexProviderReconnectLog(text) {
8535
8645
  function isCodexBenignTransportLog(text) {
8536
8646
  return /Falling back from WebSockets/i.test(text);
8537
8647
  }
8648
+ function isStdinClassRecoveryLine(text) {
8649
+ return /write_stdin failed|stdin is closed|closed for this session|session.*closed/i.test(text);
8650
+ }
8538
8651
  function hasDirectStdinRecoveryEvidence(ap) {
8539
- const candidates = [
8540
- ap.lastRuntimeError,
8541
- ...ap.recentStderr
8542
- ].filter((value) => !!value);
8543
- return candidates.some(
8544
- (text) => /write_stdin failed|stdin is closed|closed for this session|session.*closed/i.test(text)
8545
- );
8652
+ const candidates = currentErrorCandidates(ap);
8653
+ return candidates.some((text) => isStdinClassRecoveryLine(text));
8546
8654
  }
8547
8655
  function resumeSessionRecoveryReason(ap) {
8548
8656
  if (!ap.sessionId) return null;
8549
- const candidates = [
8550
- ap.lastRuntimeError,
8551
- ...ap.recentStderr
8552
- ].filter((value) => !!value);
8657
+ const candidates = currentErrorCandidates(ap);
8553
8658
  if (ap.driver.id === "claude") {
8554
8659
  return candidates.some((text) => /No conversation found with session ID/i.test(text)) ? "missing" : null;
8555
8660
  }
@@ -8839,6 +8944,7 @@ var AgentProcessManager = class _AgentProcessManager {
8839
8944
  maxConcurrentAgentStarts;
8840
8945
  agentStartIntervalMs;
8841
8946
  startingInboxes = /* @__PURE__ */ new Map();
8947
+ pendingStartRebinds = /* @__PURE__ */ new Map();
8842
8948
  /** Cached configs for agents whose process exited normally — enables auto-restart on next message */
8843
8949
  idleAgentConfigs = /* @__PURE__ */ new Map();
8844
8950
  slockCliPath;
@@ -8860,6 +8966,11 @@ var AgentProcessManager = class _AgentProcessManager {
8860
8966
  runtimeExitTraceAttrs = /* @__PURE__ */ new WeakMap();
8861
8967
  agentVisibleBoundaries = /* @__PURE__ */ new Map();
8862
8968
  agentVisibleMessageIds = /* @__PURE__ */ new Map();
8969
+ // Spawn-fail backoff state per-agent (lifted outside AgentProcess because spawn fails
8970
+ // BEFORE ap exists; rate-state must persist across stop/respawn so churn can't bypass).
8971
+ // Explicit stop clears (fresh launch resets dedup clock — CC1 lifecycle pattern); silent
8972
+ // stop preserves (same-launch respawn must keep counter or churn bypasses the cap).
8973
+ agentSpawnFailBackoff = /* @__PURE__ */ new Map();
8863
8974
  daemonVersion;
8864
8975
  computerVersion;
8865
8976
  constructor(sendToServer, daemonApiKey, opts) {
@@ -8939,6 +9050,57 @@ var AgentProcessManager = class _AgentProcessManager {
8939
9050
  const id = typeof message.message_id === "string" ? message.message_id : typeof message.id === "string" ? message.id : "";
8940
9051
  return id.length > 0 && this.getVisibleMessageIdSet(agentId, target)?.has(id) === true;
8941
9052
  }
9053
+ // ----- SPAWN-FAIL BACKOFF (per-agent) ----------------------------------
9054
+ // Anchored at auto_restart_from_idle (apm:3596) + runtime_profile_auto_restart (apm:4137).
9055
+ // Threshold > 1 → first SPAWN_FAIL_BACKOFF_THRESHOLD failures behave as today (preserves
9056
+ // single-transient-hiccup); subsequent failure schedules a per-agent cooldown gating new
9057
+ // spawn attempts. Successful spawn resets state. State lives outside AgentProcess because
9058
+ // spawn fails BEFORE ap exists and the rate-window must persist across stop/respawn
9059
+ // (else stop/start churn bypasses the cap — CC1 lifecycle pattern). Never advances any
9060
+ // consume cursor or model-seen state (3-cursor orthogonality with CC1/CC2).
9061
+ getOrCreateSpawnFailBackoff(agentId) {
9062
+ let state = this.agentSpawnFailBackoff.get(agentId);
9063
+ if (!state) {
9064
+ state = createRuntimeErrorDeliveryBackoffState();
9065
+ this.agentSpawnFailBackoff.set(agentId, state);
9066
+ }
9067
+ return state;
9068
+ }
9069
+ isSpawnFailBackoffActive(agentId) {
9070
+ const state = this.agentSpawnFailBackoff.get(agentId);
9071
+ if (!state) return false;
9072
+ return state.untilMs > 0 && this.clockNow() < state.untilMs;
9073
+ }
9074
+ clockNow() {
9075
+ return Date.now();
9076
+ }
9077
+ recordSpawnFailure(agentId, reason) {
9078
+ const state = this.getOrCreateSpawnFailBackoff(agentId);
9079
+ state.attempts += 1;
9080
+ state.reason = reason;
9081
+ if (state.attempts <= SPAWN_FAIL_BACKOFF_THRESHOLD) {
9082
+ state.untilMs = 0;
9083
+ return { backoffActive: false, attempts: state.attempts, untilMs: 0 };
9084
+ }
9085
+ const exponent = Math.min(state.attempts - SPAWN_FAIL_BACKOFF_THRESHOLD, 10);
9086
+ const baseDelay = Math.min(SPAWN_FAIL_BACKOFF_MAX_MS, SPAWN_FAIL_BACKOFF_BASE_MS * Math.pow(2, exponent));
9087
+ state.untilMs = this.clockNow() + Math.floor(baseDelay);
9088
+ if (state.timer) clearTimeout(state.timer);
9089
+ state.timer = setTimeout(() => {
9090
+ const s = this.agentSpawnFailBackoff.get(agentId);
9091
+ if (s) {
9092
+ s.timer = null;
9093
+ s.untilMs = 0;
9094
+ }
9095
+ }, Math.max(1, state.untilMs - this.clockNow()));
9096
+ return { backoffActive: true, attempts: state.attempts, untilMs: state.untilMs };
9097
+ }
9098
+ resetSpawnFailBackoff(agentId) {
9099
+ const state = this.agentSpawnFailBackoff.get(agentId);
9100
+ if (!state) return;
9101
+ if (state.timer) clearTimeout(state.timer);
9102
+ this.agentSpawnFailBackoff.delete(agentId);
9103
+ }
8942
9104
  scheduleStdinNotification(agentId, ap, delayMs) {
8943
9105
  return ap.notifications.schedule(() => {
8944
9106
  this.sendStdinNotification(agentId);
@@ -9428,6 +9590,92 @@ var AgentProcessManager = class _AgentProcessManager {
9428
9590
  ...attrs
9429
9591
  };
9430
9592
  }
9593
+ recordStartRebind(agentId, start, reason, previousLaunchId, nextLaunchId, sessionId) {
9594
+ this.recordDaemonTrace("daemon.agent.start.rebound", {
9595
+ ...this.startQueueTraceAttrs(
9596
+ agentId,
9597
+ start.config,
9598
+ start.wakeMessage,
9599
+ start.unreadSummary,
9600
+ start.resumePrompt,
9601
+ start.launchId,
9602
+ start.wakeMessageTransient ?? false
9603
+ ),
9604
+ reason,
9605
+ previous_launch_id_present: Boolean(previousLaunchId),
9606
+ next_launch_id_present: Boolean(nextLaunchId),
9607
+ session_id_present: Boolean(sessionId)
9608
+ });
9609
+ }
9610
+ sameWakeMessage(left, right) {
9611
+ if (!left || !right) return left === right;
9612
+ if (left.message_id && right.message_id) return left.message_id === right.message_id;
9613
+ return left === right;
9614
+ }
9615
+ rebindQueuedStart(agentId, start, reason) {
9616
+ const item = this.queuedAgentStarts.get(agentId);
9617
+ if (!item) {
9618
+ return false;
9619
+ }
9620
+ const previousLaunchId = item.launchId || null;
9621
+ const nextLaunchId = start.launchId || previousLaunchId;
9622
+ const previousWakeMessage = item.wakeMessage;
9623
+ if (previousWakeMessage && start.wakeMessage && !this.sameWakeMessage(previousWakeMessage, start.wakeMessage)) {
9624
+ const pending = this.startingInboxes.get(agentId) || [];
9625
+ pending.push(previousWakeMessage);
9626
+ this.startingInboxes.set(agentId, pending);
9627
+ }
9628
+ item.config = start.config;
9629
+ item.unreadSummary = start.unreadSummary;
9630
+ item.resumePrompt = start.resumePrompt;
9631
+ item.launchId = nextLaunchId || void 0;
9632
+ if (start.wakeMessage) {
9633
+ item.wakeMessage = start.wakeMessage;
9634
+ item.wakeMessageTransient = start.wakeMessageTransient;
9635
+ }
9636
+ this.recordStartRebind(
9637
+ agentId,
9638
+ { ...start, launchId: nextLaunchId || void 0 },
9639
+ reason,
9640
+ previousLaunchId,
9641
+ nextLaunchId,
9642
+ null
9643
+ );
9644
+ return true;
9645
+ }
9646
+ rebindRunningStart(agentId, start, reason) {
9647
+ const ap = this.agents.get(agentId);
9648
+ if (!ap) {
9649
+ this.pendingStartRebinds.set(agentId, start);
9650
+ return false;
9651
+ }
9652
+ const previousLaunchId = ap.launchId;
9653
+ const nextLaunchId = start.launchId || ap.launchId || null;
9654
+ const nextSessionId = ap.sessionId || start.config.sessionId || null;
9655
+ ap.launchId = nextLaunchId;
9656
+ ap.sessionId = nextSessionId;
9657
+ ap.config = {
9658
+ ...start.config,
9659
+ sessionId: nextSessionId
9660
+ };
9661
+ this.idleAgentConfigs.set(agentId, {
9662
+ config: { ...stripManagedRunnerCredential(ap.config), sessionId: nextSessionId },
9663
+ sessionId: nextSessionId,
9664
+ launchId: nextLaunchId
9665
+ });
9666
+ this.recordStartRebind(agentId, start, reason, previousLaunchId, nextLaunchId, nextSessionId);
9667
+ this.sendAgentStatus(agentId, "active", nextLaunchId);
9668
+ if (nextSessionId) {
9669
+ this.sendToServer({ type: "agent:session", agentId, sessionId: nextSessionId, launchId: nextLaunchId || void 0 });
9670
+ }
9671
+ if (start.wakeMessage) {
9672
+ const accepted = this.deliverMessage(agentId, start.wakeMessage, { transient: start.wakeMessageTransient === true });
9673
+ if (accepted instanceof Promise) {
9674
+ accepted.catch((err) => logger.error(`[Agent ${agentId}] Failed to deliver wake message after start rebind`, err));
9675
+ }
9676
+ }
9677
+ return true;
9678
+ }
9431
9679
  async startAgent(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient = false) {
9432
9680
  this.recordDaemonTrace("daemon.agent.start.requested", this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient));
9433
9681
  if (this.agents.has(agentId)) {
@@ -9435,7 +9683,8 @@ var AgentProcessManager = class _AgentProcessManager {
9435
9683
  ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
9436
9684
  reason: "already_running"
9437
9685
  });
9438
- logger.info(`[Agent ${agentId}] Start ignored (already running)`);
9686
+ this.rebindRunningStart(agentId, { config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient }, "already_running");
9687
+ logger.info(`[Agent ${agentId}] Start rebound (already running)`);
9439
9688
  return;
9440
9689
  }
9441
9690
  if (this.agentsStarting.has(agentId)) {
@@ -9443,7 +9692,8 @@ var AgentProcessManager = class _AgentProcessManager {
9443
9692
  ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
9444
9693
  reason: "already_starting"
9445
9694
  });
9446
- logger.info(`[Agent ${agentId}] Start ignored (startup in progress)`);
9695
+ this.pendingStartRebinds.set(agentId, { config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient });
9696
+ logger.info(`[Agent ${agentId}] Start rebind deferred (startup in progress)`);
9447
9697
  return;
9448
9698
  }
9449
9699
  if (this.queuedAgentStarts.has(agentId)) {
@@ -9451,7 +9701,8 @@ var AgentProcessManager = class _AgentProcessManager {
9451
9701
  ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
9452
9702
  reason: "already_queued"
9453
9703
  });
9454
- logger.info(`[Agent ${agentId}] Start ignored (startup already queued)`);
9704
+ this.rebindQueuedStart(agentId, { config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient }, "already_queued");
9705
+ logger.info(`[Agent ${agentId}] Queued start rebound (startup already queued)`);
9455
9706
  return;
9456
9707
  }
9457
9708
  return new Promise((resolve, reject) => {
@@ -9510,6 +9761,11 @@ var AgentProcessManager = class _AgentProcessManager {
9510
9761
  ...this.startQueueTraceAttrs(item.agentId, item.config, item.wakeMessage, item.unreadSummary, item.resumePrompt, item.launchId, item.wakeMessageTransient),
9511
9762
  reason: "already_running_or_starting"
9512
9763
  });
9764
+ if (this.agents.has(item.agentId)) {
9765
+ this.rebindRunningStart(item.agentId, item, "already_running_or_starting");
9766
+ } else {
9767
+ this.pendingStartRebinds.set(item.agentId, item);
9768
+ }
9513
9769
  logger.info(`[Agent ${item.agentId}] Queued start skipped (already running or starting)`);
9514
9770
  item.resolve();
9515
9771
  this.pumpAgentStartQueue();
@@ -9596,7 +9852,8 @@ var AgentProcessManager = class _AgentProcessManager {
9596
9852
  ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
9597
9853
  reason: "already_running"
9598
9854
  });
9599
- logger.info(`[Agent ${agentId}] Start ignored (already running)`);
9855
+ this.rebindRunningStart(agentId, { config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient }, "already_running");
9856
+ logger.info(`[Agent ${agentId}] Start rebound (already running)`);
9600
9857
  return;
9601
9858
  }
9602
9859
  if (this.agentsStarting.has(agentId)) {
@@ -9604,7 +9861,8 @@ var AgentProcessManager = class _AgentProcessManager {
9604
9861
  ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
9605
9862
  reason: "already_starting"
9606
9863
  });
9607
- logger.info(`[Agent ${agentId}] Start ignored (startup in progress)`);
9864
+ this.pendingStartRebinds.set(agentId, { config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient });
9865
+ logger.info(`[Agent ${agentId}] Start rebind deferred (startup in progress)`);
9608
9866
  return;
9609
9867
  }
9610
9868
  this.agentsStarting.add(agentId);
@@ -9732,7 +9990,12 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9732
9990
  nativeStandingPrompt: Boolean(driver.supportsNativeStandingPrompt)
9733
9991
  });
9734
9992
  const effectiveConfig = await this.buildSpawnConfig(agentId, runtimeConfig);
9735
- const canDeferEmptyStart = driver.deferSpawnUntilMessage === true && !wakeMessage && !runtimeConfig.runtimeProfileControl && (!unreadSummary || Object.keys(unreadSummary).length === 0);
9993
+ const pendingStartRebind = this.pendingStartRebinds.get(agentId);
9994
+ if (pendingStartRebind) {
9995
+ this.pendingStartRebinds.delete(agentId);
9996
+ }
9997
+ const effectiveLaunchId = pendingStartRebind?.launchId || launchId || null;
9998
+ const canDeferEmptyStart = driver.deferSpawnUntilMessage === true && !wakeMessage && !pendingStartRebind?.wakeMessage && !runtimeConfig.runtimeProfileControl && (!unreadSummary || Object.keys(unreadSummary).length === 0);
9736
9999
  if (canDeferEmptyStart) {
9737
10000
  const pendingMessages = this.startingInboxes.get(agentId) || [];
9738
10001
  this.startingInboxes.delete(agentId);
@@ -9740,12 +10003,12 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9740
10003
  this.idleAgentConfigs.set(agentId, {
9741
10004
  config: effectiveConfig,
9742
10005
  sessionId: effectiveConfig.sessionId || null,
9743
- launchId: launchId || null
10006
+ launchId: effectiveLaunchId
9744
10007
  });
9745
- this.sendAgentStatus(agentId, "active", launchId || null);
10008
+ this.sendAgentStatus(agentId, "active", effectiveLaunchId);
9746
10009
  this.broadcastActivity(agentId, "online", "Process idle");
9747
10010
  this.recordDaemonTrace("daemon.agent.spawn.deferred", {
9748
- ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
10011
+ ...this.startQueueTraceAttrs(agentId, config, wakeMessage, unreadSummary, resumePrompt, effectiveLaunchId || void 0, wakeMessageTransient),
9749
10012
  pending_messages_count: pendingMessages.length,
9750
10013
  reason: "defer_until_concrete_message"
9751
10014
  });
@@ -9763,7 +10026,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9763
10026
  workingDirectory: agentDataDir,
9764
10027
  slockCliPath: this.slockCliPath,
9765
10028
  daemonApiKey: this.daemonApiKey,
9766
- launchId: launchId || null,
10029
+ launchId: effectiveLaunchId,
9767
10030
  agentCredentialProxyInboxCoordinator: this.createAgentProxyInboxCoordinator(agentId),
9768
10031
  cliTransportTraceDir: this.cliTransportTraceDir
9769
10032
  };
@@ -9774,7 +10037,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9774
10037
  inbox: wakeMessageDeliveredAsInboxUpdate && wakeMessage ? [wakeMessage, ...startingInboxMessages] : startingInboxMessages,
9775
10038
  config: runtimeConfig,
9776
10039
  sessionId: runtimeConfig.sessionId || null,
9777
- launchId: launchId || null,
10040
+ launchId: effectiveLaunchId,
9778
10041
  startupWakeMessage: wakeMessage,
9779
10042
  startupUnreadSummary: unreadSummary,
9780
10043
  startupResumePrompt: resumePrompt,
@@ -9793,6 +10056,8 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9793
10056
  recentStdout: [],
9794
10057
  recentStderr: [],
9795
10058
  lastRuntimeError: null,
10059
+ recentDecisionStderr: [],
10060
+ runtimeErrorSinceProgress: false,
9796
10061
  runtimeErrorDeliveryBackoff: createRuntimeErrorDeliveryBackoffState(),
9797
10062
  spawnError: null,
9798
10063
  exitCode: null,
@@ -9808,8 +10073,11 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9808
10073
  this.idleAgentConfigs.set(agentId, {
9809
10074
  config: { ...effectiveConfig, sessionId: effectiveConfig.sessionId || null },
9810
10075
  sessionId: effectiveConfig.sessionId || null,
9811
- launchId: launchId || null
10076
+ launchId: effectiveLaunchId
9812
10077
  });
10078
+ if (pendingStartRebind) {
10079
+ this.recordStartRebind(agentId, pendingStartRebind, "startup_registered", launchId || null, effectiveLaunchId, agentProcess.sessionId);
10080
+ }
9813
10081
  this.startRuntimeTrace(agentId, agentProcess, "spawn", wakeMessage ? [wakeMessage] : void 0, runtimeInputTraceAttrs);
9814
10082
  this.agentsStarting.delete(agentId);
9815
10083
  if (runtimeConfig.runtimeProfileControl) {
@@ -9836,6 +10104,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9836
10104
  if (driver.id === "codex" && isCodexProviderReconnectLog(text)) {
9837
10105
  if (current) {
9838
10106
  current.recentStderr = pushRecentStderr(current.recentStderr, text);
10107
+ current.recentDecisionStderr = pushRecentStderr(current.recentDecisionStderr, text);
9839
10108
  }
9840
10109
  this.recordDaemonTrace("daemon.agent.provider_reconnect", {
9841
10110
  agentId,
@@ -9852,6 +10121,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
9852
10121
  if (driver.id === "codex" && isCodexBenignTransportLog(text)) return;
9853
10122
  if (current) {
9854
10123
  current.recentStderr = pushRecentStderr(current.recentStderr, text);
10124
+ current.recentDecisionStderr = pushRecentStderr(current.recentDecisionStderr, text);
9855
10125
  }
9856
10126
  logger.error(`[Agent ${agentId} stderr]: ${text}`);
9857
10127
  });
@@ -10042,16 +10312,26 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10042
10312
  throw new Error(`Runtime session failed to start: ${startResult.reason}${startResult.error ? ` (${startResult.error})` : ""}`);
10043
10313
  }
10044
10314
  this.recordDaemonTrace("daemon.agent.spawn.created", {
10045
- ...this.startQueueTraceAttrs(agentId, effectiveConfig, wakeMessage, unreadSummary, resumePrompt, launchId, wakeMessageTransient),
10315
+ ...this.startQueueTraceAttrs(agentId, effectiveConfig, wakeMessage, unreadSummary, resumePrompt, agentProcess.launchId || void 0, wakeMessageTransient),
10046
10316
  detached: false,
10047
10317
  new_session: false,
10048
10318
  process_pid_present: typeof runtime.pid === "number"
10049
10319
  });
10050
- this.sendAgentStatus(agentId, "active", launchId || null);
10320
+ this.sendAgentStatus(agentId, "active", agentProcess.launchId);
10321
+ if (pendingStartRebind && agentProcess.sessionId) {
10322
+ this.sendToServer({ type: "agent:session", agentId, sessionId: agentProcess.sessionId, launchId: agentProcess.launchId || void 0 });
10323
+ }
10324
+ if (pendingStartRebind?.wakeMessage) {
10325
+ const accepted = this.deliverMessage(agentId, pendingStartRebind.wakeMessage, { transient: pendingStartRebind.wakeMessageTransient === true });
10326
+ if (accepted instanceof Promise) {
10327
+ accepted.catch((err) => logger.error(`[Agent ${agentId}] Failed to deliver wake message after startup rebind`, err));
10328
+ }
10329
+ }
10051
10330
  this.broadcastActivity(agentId, "working", "Starting\u2026");
10052
10331
  this.startRuntimeStartupTimeout(agentId, agentProcess);
10053
10332
  } catch (err) {
10054
10333
  this.agentsStarting.delete(agentId);
10334
+ this.pendingStartRebinds.delete(agentId);
10055
10335
  this.cleanupFailedRuntimeStart(agentId, agentProcess, err);
10056
10336
  throw err;
10057
10337
  }
@@ -10320,6 +10600,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10320
10600
  }
10321
10601
  async stopAgent(agentId, { wait = false, silent = false } = {}) {
10322
10602
  this.cancelQueuedAgentStart(agentId, "stop requested");
10603
+ this.pendingStartRebinds.delete(agentId);
10323
10604
  this.idleAgentConfigs.delete(agentId);
10324
10605
  const ap = this.agents.get(agentId);
10325
10606
  if (!ap) {
@@ -10342,6 +10623,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10342
10623
  this.activityClientSeqByAgent.delete(agentId);
10343
10624
  this.agentVisibleBoundaries.delete(agentId);
10344
10625
  this.agentVisibleMessageIds.delete(agentId);
10626
+ this.resetSpawnFailBackoff(agentId);
10345
10627
  }
10346
10628
  this.runtimeExitTraceAttrs.set(ap.runtime, {
10347
10629
  stop_source: silent ? "daemon_internal" : "explicit_request",
@@ -10438,6 +10720,25 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10438
10720
  return true;
10439
10721
  }
10440
10722
  logger.info(`[Agent ${agentId}] Starting from idle state for new message`);
10723
+ if (this.isSpawnFailBackoffActive(agentId)) {
10724
+ const state = this.agentSpawnFailBackoff.get(agentId);
10725
+ const pending = this.startingInboxes.get(agentId) || [];
10726
+ pending.push(message);
10727
+ this.startingInboxes.set(agentId, pending);
10728
+ this.recordDaemonTrace("daemon.agent.delivery.routed", this.deliveryTraceAttrs(agentId, message, {
10729
+ outcome: "spawn_fail_cooldown_active",
10730
+ accepted: true,
10731
+ process_present: false,
10732
+ cached_idle_config_present: true,
10733
+ runtime: cached.config.runtime,
10734
+ session_id_present: Boolean(cached.sessionId),
10735
+ launchId: cached.launchId || void 0,
10736
+ spawn_fail_attempts: state.attempts,
10737
+ spawn_fail_until_ms: state.untilMs,
10738
+ starting_inbox_count: pending.length
10739
+ }));
10740
+ return true;
10741
+ }
10441
10742
  this.idleAgentConfigs.delete(agentId);
10442
10743
  this.recordDaemonTrace("daemon.agent.delivery.routed", this.deliveryTraceAttrs(agentId, message, {
10443
10744
  outcome: "auto_restart_from_idle",
@@ -10448,12 +10749,33 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10448
10749
  session_id_present: Boolean(cached.sessionId),
10449
10750
  launchId: cached.launchId || void 0
10450
10751
  }));
10451
- return this.startAgent(agentId, cached.config, message, void 0, void 0, cached.launchId || void 0, transientDelivery).then(() => true, (err) => {
10752
+ return this.startAgent(agentId, cached.config, message, void 0, void 0, cached.launchId || void 0, transientDelivery).then(() => {
10753
+ this.resetSpawnFailBackoff(agentId);
10754
+ return true;
10755
+ }, (err) => {
10452
10756
  logger.error(`[Agent ${agentId}] Failed to auto-restart`, err);
10453
10757
  if (this.reportRunnerCredentialMintFailure(agentId, err, cached.launchId, "idle_auto_restart")) {
10758
+ const report2 = this.recordSpawnFailure(agentId, "runner_credential_mint");
10759
+ this.recordDaemonTrace("daemon.agent.spawn.fail_backoff", {
10760
+ agentId,
10761
+ source: "idle_auto_restart",
10762
+ reason: "runner_credential_mint",
10763
+ attempts: report2.attempts,
10764
+ cooldown_active: report2.backoffActive,
10765
+ until_ms: report2.untilMs
10766
+ });
10454
10767
  return false;
10455
10768
  }
10456
10769
  this.idleAgentConfigs.set(agentId, cached);
10770
+ const report = this.recordSpawnFailure(agentId, "spawn_error");
10771
+ this.recordDaemonTrace("daemon.agent.spawn.fail_backoff", {
10772
+ agentId,
10773
+ source: "idle_auto_restart",
10774
+ reason: "spawn_error",
10775
+ attempts: report.attempts,
10776
+ cooldown_active: report.backoffActive,
10777
+ until_ms: report.untilMs
10778
+ });
10457
10779
  return false;
10458
10780
  });
10459
10781
  }
@@ -10928,10 +11250,35 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10928
11250
  const cached = this.idleAgentConfigs.get(agentId);
10929
11251
  if (cached) {
10930
11252
  logger.info(`[Agent ${agentId}] Starting from idle state for runtime profile ${kind} ${key}`);
11253
+ if (this.isSpawnFailBackoffActive(agentId)) {
11254
+ const state = this.agentSpawnFailBackoff.get(agentId);
11255
+ span.end("ok", {
11256
+ attrs: {
11257
+ outcome: "spawn_fail_cooldown_active",
11258
+ runtime: cached.config.runtime,
11259
+ launchId: cached.launchId || void 0,
11260
+ spawn_fail_attempts: state.attempts,
11261
+ spawn_fail_until_ms: state.untilMs
11262
+ }
11263
+ });
11264
+ return true;
11265
+ }
10931
11266
  this.idleAgentConfigs.delete(agentId);
10932
- return this.startAgent(agentId, cached.config, message, void 0, void 0, cached.launchId || void 0).then(() => true, (err) => {
11267
+ return this.startAgent(agentId, cached.config, message, void 0, void 0, cached.launchId || void 0).then(() => {
11268
+ this.resetSpawnFailBackoff(agentId);
11269
+ return true;
11270
+ }, (err) => {
10933
11271
  logger.error(`[Agent ${agentId}] Failed to auto-restart for runtime profile notification`, err);
10934
11272
  if (this.reportRunnerCredentialMintFailure(agentId, err, cached.launchId, "runtime_profile_auto_restart")) {
11273
+ const report2 = this.recordSpawnFailure(agentId, "runner_credential_mint");
11274
+ this.recordDaemonTrace("daemon.agent.spawn.fail_backoff", {
11275
+ agentId,
11276
+ source: "runtime_profile_auto_restart",
11277
+ reason: "runner_credential_mint",
11278
+ attempts: report2.attempts,
11279
+ cooldown_active: report2.backoffActive,
11280
+ until_ms: report2.untilMs
11281
+ });
10935
11282
  span.end("error", {
10936
11283
  attrs: {
10937
11284
  outcome: "runner_credential_mint_failed",
@@ -10942,6 +11289,15 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
10942
11289
  return false;
10943
11290
  }
10944
11291
  this.idleAgentConfigs.set(agentId, cached);
11292
+ const report = this.recordSpawnFailure(agentId, "spawn_error");
11293
+ this.recordDaemonTrace("daemon.agent.spawn.fail_backoff", {
11294
+ agentId,
11295
+ source: "runtime_profile_auto_restart",
11296
+ reason: "spawn_error",
11297
+ attempts: report.attempts,
11298
+ cooldown_active: report.backoffActive,
11299
+ until_ms: report.untilMs
11300
+ });
10945
11301
  span.end("error", {
10946
11302
  attrs: {
10947
11303
  outcome: "restart_failed",
@@ -11535,6 +11891,21 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
11535
11891
  }
11536
11892
  noteRuntimeProgress(ap, eventKind) {
11537
11893
  ap.runtimeProgress.noteRuntimeEvent(eventKind);
11894
+ this.invalidateRecoveryErrorView(ap);
11895
+ }
11896
+ /**
11897
+ * Invalidate the decision-only error view on a liveness signal. The process is
11898
+ * alive and has moved past any error it logged earlier in the turn, so a stale
11899
+ * error must not keep restarting/re-routing a recovered agent. Error-class
11900
+ * agnostic; a genuinely current failure re-populates the view after this point.
11901
+ * Called on BOTH progress paths — ordinary runtime events and
11902
+ * `internal_progress` (raw runtime activity that stale-recovery already treats
11903
+ * as liveness). `recentStderr`/`lastRuntimeError` are untouched and stay full
11904
+ * for diagnostics, user-facing reporting, and sticky terminal-failure gating.
11905
+ */
11906
+ invalidateRecoveryErrorView(ap) {
11907
+ ap.recentDecisionStderr = [];
11908
+ ap.runtimeErrorSinceProgress = false;
11538
11909
  }
11539
11910
  recordGatedSteeringEvent(agentId, ap, event, attrs = {}) {
11540
11911
  if (ap.runtime.descriptor.busyDelivery !== "gated") return;
@@ -11725,6 +12096,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
11725
12096
  const terminalFailureDetail = classifyTerminalFailure(ap);
11726
12097
  const detail = terminalFailureDetail?.detail ?? formatRuntimeStartTimeoutMessage(ap.driver.id);
11727
12098
  ap.lastRuntimeError = detail;
12099
+ ap.runtimeErrorSinceProgress = true;
11728
12100
  ap.runtimeProgress.markStale();
11729
12101
  const staleForMs = Math.max(timeoutMs, ap.runtimeProgress.ageMs());
11730
12102
  const diagnostic = buildRuntimeStallDiagnostic(ap, staleForMs, Math.max(1, Math.floor(staleForMs / 6e4)));
@@ -11879,6 +12251,7 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
11879
12251
  }
11880
12252
  if (event.kind === "internal_progress") {
11881
12253
  ap.runtimeProgress.noteInternalProgress();
12254
+ this.invalidateRecoveryErrorView(ap);
11882
12255
  this.clearRuntimeErrorDeliveryBackoffAfterProgress(agentId, ap, event.kind);
11883
12256
  this.recordRuntimeTraceEvent(agentId, ap, "runtime.progress.internal_observed", {
11884
12257
  turn_outcome: "held",
@@ -12053,7 +12426,10 @@ Use ${communicationCommand(driver, "read_history")} to catch up on the channels
12053
12426
  case "error": {
12054
12427
  this.interruptCompactionIfActive(agentId);
12055
12428
  this.flushPendingTrajectory(agentId);
12056
- if (ap) ap.lastRuntimeError = event.message;
12429
+ if (ap) {
12430
+ ap.lastRuntimeError = event.message;
12431
+ ap.runtimeErrorSinceProgress = true;
12432
+ }
12057
12433
  let visibleErrorMessage = event.message;
12058
12434
  if (ap) {
12059
12435
  const runtimeErrorDiagnostics = buildRuntimeErrorDiagnosticEnvelope(event.message);
@@ -13763,7 +14139,7 @@ var DAEMON_CORE_TRACE_ATTR_CONTRACTS = {
13763
14139
  spanAttrs: ["running_agents_count", "idle_agents_count"]
13764
14140
  }
13765
14141
  };
13766
- var DAEMON_CLI_USAGE = "Usage: slock-daemon --server-url <url> --api-key <key>";
14142
+ var DAEMON_CLI_USAGE = `Usage: slock-daemon --server-url <url> (--api-key <key> or ${DAEMON_API_KEY_ENV}=<key>)`;
13767
14143
  var RunnerCredentialMintError2 = class extends Error {
13768
14144
  code;
13769
14145
  retryable;
@@ -13799,9 +14175,9 @@ function runnerCredentialErrorDetail2(error) {
13799
14175
  async function waitForRunnerCredentialRetry2() {
13800
14176
  await new Promise((resolve) => setTimeout(resolve, RUNNER_CREDENTIAL_MINT_RETRY_DELAY_MS2));
13801
14177
  }
13802
- function parseDaemonCliArgs(args) {
14178
+ function parseDaemonCliArgs(args, env = {}) {
13803
14179
  let serverUrl = "";
13804
- let apiKey = "";
14180
+ let apiKey = env[DAEMON_API_KEY_ENV] ?? "";
13805
14181
  for (let i = 0; i < args.length; i++) {
13806
14182
  if (args[i] === "--server-url" && args[i + 1]) serverUrl = args[++i];
13807
14183
  if (args[i] === "--api-key" && args[i + 1]) apiKey = args[++i];
@@ -14582,6 +14958,8 @@ var DaemonCore = class {
14582
14958
  };
14583
14959
 
14584
14960
  export {
14961
+ DAEMON_API_KEY_ENV,
14962
+ scrubDaemonAuthEnv,
14585
14963
  subscribeDaemonLogs,
14586
14964
  resolveWorkspaceDirectoryPath,
14587
14965
  scanWorkspaceDirectories,