@botcord/daemon 0.2.75 → 0.2.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/cloud-auth.d.ts +47 -0
  2. package/dist/cloud-auth.js +51 -0
  3. package/dist/cloud-daemon.d.ts +43 -0
  4. package/dist/cloud-daemon.js +252 -0
  5. package/dist/cloud-mode.d.ts +45 -0
  6. package/dist/cloud-mode.js +55 -0
  7. package/dist/cloud-settle.d.ts +81 -0
  8. package/dist/cloud-settle.js +100 -0
  9. package/dist/daemon-singleton.d.ts +26 -0
  10. package/dist/daemon-singleton.js +91 -0
  11. package/dist/daemon.d.ts +1 -1
  12. package/dist/daemon.js +15 -6
  13. package/dist/doctor.d.ts +4 -1
  14. package/dist/doctor.js +15 -4
  15. package/dist/gateway/channels/botcord.d.ts +1 -1
  16. package/dist/gateway/channels/botcord.js +48 -5
  17. package/dist/gateway/dispatcher.d.ts +34 -1
  18. package/dist/gateway/dispatcher.js +277 -20
  19. package/dist/gateway/gateway.d.ts +9 -1
  20. package/dist/gateway/gateway.js +4 -1
  21. package/dist/gateway/runtime-errors.d.ts +6 -0
  22. package/dist/gateway/runtime-errors.js +14 -0
  23. package/dist/gateway/runtimes/claude-code.d.ts +8 -0
  24. package/dist/gateway/runtimes/claude-code.js +92 -4
  25. package/dist/gateway/runtimes/deepseek-tui.js +19 -5
  26. package/dist/gateway/transcript.d.ts +1 -1
  27. package/dist/gateway/types.d.ts +33 -0
  28. package/dist/index.js +71 -80
  29. package/dist/provision.d.ts +2 -0
  30. package/dist/provision.js +39 -1
  31. package/dist/status-render.js +17 -0
  32. package/package.json +2 -2
  33. package/src/__tests__/cloud-auth.test.ts +42 -0
  34. package/src/__tests__/cloud-daemon.test.ts +237 -0
  35. package/src/__tests__/cloud-mode.test.ts +65 -0
  36. package/src/__tests__/cloud-settle.test.ts +287 -0
  37. package/src/__tests__/daemon-singleton.test.ts +89 -0
  38. package/src/__tests__/doctor.test.ts +34 -0
  39. package/src/__tests__/runtime-discovery.test.ts +90 -0
  40. package/src/__tests__/status-render.test.ts +34 -0
  41. package/src/cloud-auth.ts +78 -0
  42. package/src/cloud-daemon.ts +338 -0
  43. package/src/cloud-mode.ts +70 -0
  44. package/src/cloud-settle.ts +182 -0
  45. package/src/daemon-singleton.ts +122 -0
  46. package/src/daemon.ts +18 -5
  47. package/src/doctor.ts +18 -5
  48. package/src/gateway/__tests__/botcord-channel.test.ts +74 -0
  49. package/src/gateway/__tests__/claude-code-adapter.test.ts +101 -1
  50. package/src/gateway/__tests__/deepseek-tui-adapter.test.ts +19 -0
  51. package/src/gateway/__tests__/dispatcher.test.ts +120 -0
  52. package/src/gateway/channels/botcord.ts +54 -7
  53. package/src/gateway/dispatcher.ts +354 -21
  54. package/src/gateway/gateway.ts +16 -1
  55. package/src/gateway/runtime-errors.ts +15 -0
  56. package/src/gateway/runtimes/claude-code.ts +98 -2
  57. package/src/gateway/runtimes/deepseek-tui.ts +23 -5
  58. package/src/gateway/transcript.ts +1 -1
  59. package/src/gateway/types.ts +34 -0
  60. package/src/index.ts +83 -74
  61. package/src/provision.ts +45 -1
  62. package/src/status-render.ts +24 -0
@@ -1,8 +1,11 @@
1
1
  import { randomUUID } from "node:crypto";
2
+ import { looksLikeRuntimeAuthFailure } from "./runtime-errors.js";
2
3
  import { resolveRoute } from "./router.js";
3
4
  import { sessionKey } from "./session-store.js";
4
5
  import { truncateTextField, } from "./transcript.js";
5
6
  const DEFAULT_TURN_TIMEOUT_MS = 30 * 60 * 1000;
7
+ const DEFAULT_RUNTIME_AUTH_FAILURE_THRESHOLD = 3;
8
+ const DEFAULT_RUNTIME_AUTH_FAILURE_COOLDOWN_MS = 10 * 60 * 1000;
6
9
  /**
7
10
  * Owner-chat room prefix. Reply-text gating: only rooms with this prefix get
8
11
  * `result.text` forwarded to the channel; in every other room the runtime's
@@ -101,6 +104,26 @@ function redactSecretString(value) {
101
104
  .replace(/\b(token=)[^\s"']+/gi, "$1[REDACTED]")
102
105
  .replace(/\b(drt_|dit_|gho_)[A-Za-z0-9_-]+/g, "$1[REDACTED]");
103
106
  }
107
+ function extractCloudRunBudget(msg) {
108
+ const envelope = msg.raw?.envelope;
109
+ if (envelope?.type !== "cloud_run")
110
+ return undefined;
111
+ const budget = envelope.payload?.cloud_run?.budget;
112
+ if (!budget)
113
+ return undefined;
114
+ const out = {};
115
+ if (typeof budget.max_wall_time_seconds === "number" &&
116
+ Number.isFinite(budget.max_wall_time_seconds) &&
117
+ budget.max_wall_time_seconds > 0) {
118
+ out.maxWallTimeMs = Math.floor(budget.max_wall_time_seconds * 1000);
119
+ }
120
+ if (typeof budget.max_tool_calls === "number" &&
121
+ Number.isFinite(budget.max_tool_calls) &&
122
+ budget.max_tool_calls > 0) {
123
+ out.maxToolCalls = Math.floor(budget.max_tool_calls);
124
+ }
125
+ return out.maxWallTimeMs !== undefined || out.maxToolCalls !== undefined ? out : undefined;
126
+ }
104
127
  /**
105
128
  * Reason carried on `AbortController.abort()` when a cancel-previous wave
106
129
  * is taking over the slot. Distinguishing this from a timeout abort lets
@@ -137,10 +160,14 @@ export class Dispatcher {
137
160
  sessionStore;
138
161
  log;
139
162
  turnTimeoutMs;
163
+ runtimeAuthFailureThreshold;
164
+ runtimeAuthFailureCooldownMs;
140
165
  buildSystemContext;
141
166
  buildMemoryContext;
142
167
  onInbound;
143
168
  onOutbound;
169
+ onTurnComplete;
170
+ onRuntimeCircuitBreakerChange;
144
171
  composeUserTurn;
145
172
  managedRoutes;
146
173
  attentionGate;
@@ -148,6 +175,7 @@ export class Dispatcher {
148
175
  transcript;
149
176
  queues = new Map();
150
177
  deferredMultimodal = new Map();
178
+ runtimeAuthFailures = new Map();
151
179
  /**
152
180
  * Last `/hub/typing` ping timestamp per (accountId, conversationId).
153
181
  * Used to debounce cancel-previous bursts so we don't trip Hub's 20/min
@@ -161,10 +189,16 @@ export class Dispatcher {
161
189
  this.sessionStore = opts.sessionStore;
162
190
  this.log = opts.log;
163
191
  this.turnTimeoutMs = opts.turnTimeoutMs ?? DEFAULT_TURN_TIMEOUT_MS;
192
+ this.runtimeAuthFailureThreshold =
193
+ opts.runtimeAuthFailureThreshold ?? DEFAULT_RUNTIME_AUTH_FAILURE_THRESHOLD;
194
+ this.runtimeAuthFailureCooldownMs =
195
+ opts.runtimeAuthFailureCooldownMs ?? DEFAULT_RUNTIME_AUTH_FAILURE_COOLDOWN_MS;
164
196
  this.buildSystemContext = opts.buildSystemContext;
165
197
  this.buildMemoryContext = opts.buildMemoryContext;
166
198
  this.onInbound = opts.onInbound;
167
199
  this.onOutbound = opts.onOutbound;
200
+ this.onTurnComplete = opts.onTurnComplete;
201
+ this.onRuntimeCircuitBreakerChange = opts.onRuntimeCircuitBreakerChange;
168
202
  this.composeUserTurn = opts.composeUserTurn;
169
203
  this.managedRoutes = opts.managedRoutes;
170
204
  this.attentionGate = opts.attentionGate;
@@ -368,6 +402,11 @@ export class Dispatcher {
368
402
  fallback: "raw_text",
369
403
  });
370
404
  }
405
+ const openAuthBreaker = this.openRuntimeAuthBreaker(dispatchRoute, dispatchMsg);
406
+ if (openAuthBreaker) {
407
+ await this.skipRuntimeForAuthBreaker(openAuthBreaker, dispatchRoute, dispatchMsg, dispatchChannel, dispatchTurnId);
408
+ return;
409
+ }
371
410
  if (mode === "cancel-previous") {
372
411
  await this.runCancelPrevious(queueKey, dispatchRoute, text, dispatchMsg, dispatchChannel, dispatchTurnId, mergedFromDeferredTurnIds);
373
412
  }
@@ -384,6 +423,15 @@ export class Dispatcher {
384
423
  }
385
424
  return out;
386
425
  }
426
+ runtimeCircuitBreakers() {
427
+ this.pruneExpiredRuntimeAuthBreakers();
428
+ const out = {};
429
+ for (const [key, state] of this.runtimeAuthFailures) {
430
+ if (state.blockedUntil > Date.now())
431
+ out[key] = { ...state };
432
+ }
433
+ return out;
434
+ }
387
435
  // ---------------------------------------------------------------------------
388
436
  // Internals
389
437
  // ---------------------------------------------------------------------------
@@ -444,6 +492,147 @@ export class Dispatcher {
444
492
  this.deferredMultimodal.delete(queueKey);
445
493
  return list;
446
494
  }
495
+ runtimeAuthBreakerKey(route, msg) {
496
+ const thread = msg.conversation.threadId ?? "";
497
+ return `${route.runtime}:${msg.channel}:${msg.accountId}:${msg.conversation.id}:${thread}`;
498
+ }
499
+ openRuntimeAuthBreaker(route, msg) {
500
+ const key = this.runtimeAuthBreakerKey(route, msg);
501
+ const state = this.runtimeAuthFailures.get(key);
502
+ if (!state)
503
+ return null;
504
+ if (state.blockedUntil > 0 && state.blockedUntil <= Date.now()) {
505
+ this.runtimeAuthFailures.delete(key);
506
+ return null;
507
+ }
508
+ return state.blockedUntil > Date.now() ? state : null;
509
+ }
510
+ pruneExpiredRuntimeAuthBreakers() {
511
+ const now = Date.now();
512
+ for (const [key, state] of this.runtimeAuthFailures) {
513
+ if (state.blockedUntil > 0 && state.blockedUntil <= now)
514
+ this.runtimeAuthFailures.delete(key);
515
+ }
516
+ }
517
+ recordRuntimeAuthFailure(route, msg, error) {
518
+ const now = Date.now();
519
+ const key = this.runtimeAuthBreakerKey(route, msg);
520
+ const prev = this.runtimeAuthFailures.get(key);
521
+ const failures = (prev?.failures ?? 0) + 1;
522
+ const openedAt = prev?.openedAt ?? now;
523
+ const state = {
524
+ key,
525
+ runtime: route.runtime,
526
+ channel: msg.channel,
527
+ accountId: msg.accountId,
528
+ conversationId: msg.conversation.id,
529
+ threadId: msg.conversation.threadId ?? null,
530
+ failures,
531
+ openedAt,
532
+ blockedUntil: failures >= this.runtimeAuthFailureThreshold
533
+ ? now + this.runtimeAuthFailureCooldownMs
534
+ : 0,
535
+ lastFailureAt: now,
536
+ lastError: error,
537
+ };
538
+ this.runtimeAuthFailures.set(key, state);
539
+ if (state.blockedUntil > now) {
540
+ this.log.error("dispatcher: runtime auth circuit breaker opened", {
541
+ key,
542
+ runtime: route.runtime,
543
+ agentId: msg.accountId,
544
+ roomId: msg.conversation.id,
545
+ topicId: msg.conversation.threadId ?? null,
546
+ failures,
547
+ blockedUntil: state.blockedUntil,
548
+ error,
549
+ });
550
+ this.notifyRuntimeCircuitBreakerChange();
551
+ return state;
552
+ }
553
+ this.log.warn("dispatcher: runtime authentication failure recorded", {
554
+ key,
555
+ runtime: route.runtime,
556
+ agentId: msg.accountId,
557
+ roomId: msg.conversation.id,
558
+ topicId: msg.conversation.threadId ?? null,
559
+ failures,
560
+ threshold: this.runtimeAuthFailureThreshold,
561
+ error,
562
+ });
563
+ return null;
564
+ }
565
+ clearRuntimeAuthFailures(route, msg) {
566
+ const key = this.runtimeAuthBreakerKey(route, msg);
567
+ if (!this.runtimeAuthFailures.delete(key))
568
+ return;
569
+ this.log.info("dispatcher: runtime auth circuit breaker cleared", {
570
+ key,
571
+ runtime: route.runtime,
572
+ agentId: msg.accountId,
573
+ roomId: msg.conversation.id,
574
+ topicId: msg.conversation.threadId ?? null,
575
+ });
576
+ this.notifyRuntimeCircuitBreakerChange();
577
+ }
578
+ notifyRuntimeCircuitBreakerChange() {
579
+ try {
580
+ this.onRuntimeCircuitBreakerChange?.();
581
+ }
582
+ catch (err) {
583
+ this.log.warn("dispatcher: onRuntimeCircuitBreakerChange threw", {
584
+ error: err instanceof Error ? err.message : String(err),
585
+ });
586
+ }
587
+ }
588
+ async skipRuntimeForAuthBreaker(state, route, msg, channel, turnId) {
589
+ const error = `runtime authentication failed repeatedly; dispatch paused until ${new Date(state.blockedUntil).toISOString()}`;
590
+ this.log.warn("dispatcher: runtime auth circuit breaker blocking turn", {
591
+ key: state.key,
592
+ runtime: route.runtime,
593
+ agentId: msg.accountId,
594
+ roomId: msg.conversation.id,
595
+ topicId: msg.conversation.threadId ?? null,
596
+ turnId,
597
+ blockedUntil: state.blockedUntil,
598
+ });
599
+ this.transcript.write({
600
+ ts: nowIso(),
601
+ kind: "turn_error",
602
+ turnId,
603
+ agentId: msg.accountId,
604
+ roomId: msg.conversation.id,
605
+ topicId: msg.conversation.threadId ?? null,
606
+ phase: "runtime",
607
+ error,
608
+ durationMs: 0,
609
+ });
610
+ const canDeliverRuntimeText = isOwnerChatRoom(msg) || !isBotCordChannel(channel);
611
+ const canDeliverRuntimeDiagnostics = canDeliverRuntimeText || isBotCordChannel(channel);
612
+ if (canDeliverRuntimeDiagnostics) {
613
+ const sendResult = await this.sendReply(channel, {
614
+ channel: msg.channel,
615
+ accountId: msg.accountId,
616
+ conversationId: msg.conversation.id,
617
+ threadId: msg.conversation.threadId ?? null,
618
+ type: "error",
619
+ text: `⚠️ Runtime error: ${truncate(error, 500)}`,
620
+ replyTo: this.providerReplyTo(msg),
621
+ traceId: msg.trace?.id ?? null,
622
+ }, turnId);
623
+ this.emitOutbound({
624
+ turnId,
625
+ msg,
626
+ runtime: route.runtime,
627
+ runtimeSessionId: null,
628
+ startedAt: Date.now(),
629
+ finalText: truncateTextField(""),
630
+ deliveryStatus: sendResult.ok ? "delivered" : "send_failed",
631
+ deliveryReason: sendResult.ok ? null : sendResult.error,
632
+ blocks: [],
633
+ });
634
+ }
635
+ }
447
636
  async runCancelPrevious(queueKey, route, text, msg, channel, turnId, mergedFromTurnIds = []) {
448
637
  const q = this.getQueue(queueKey);
449
638
  // Bump the generation on every arrival. Older arrivals still awaiting
@@ -703,6 +892,7 @@ export class Dispatcher {
703
892
  turnId,
704
893
  controller,
705
894
  timedOut: false,
895
+ budgetExceeded: null,
706
896
  snapshot,
707
897
  done,
708
898
  dispatchedAt: startedAt,
@@ -738,6 +928,9 @@ export class Dispatcher {
738
928
  ...(mergedFromTurnIds.length > 0 ? { mergedFromTurns: mergedFromTurnIds.length } : {}),
739
929
  composedPreview: logPreview(text),
740
930
  });
931
+ const cloudRunBudget = extractCloudRunBudget(msg);
932
+ const effectiveTurnTimeoutMs = Math.min(this.turnTimeoutMs, cloudRunBudget?.maxWallTimeMs ?? this.turnTimeoutMs);
933
+ let observedToolCalls = 0;
741
934
  // Hard-cap turn with a timeout.
742
935
  const timer = setTimeout(() => {
743
936
  slot.timedOut = true;
@@ -747,10 +940,10 @@ export class Dispatcher {
747
940
  topicId: msg.conversation.threadId ?? null,
748
941
  turnId,
749
942
  queueKey,
750
- timeoutMs: this.turnTimeoutMs,
943
+ timeoutMs: effectiveTurnTimeoutMs,
751
944
  });
752
945
  controller.abort();
753
- }, this.turnTimeoutMs);
946
+ }, effectiveTurnTimeoutMs);
754
947
  if (typeof timer.unref === "function")
755
948
  timer.unref();
756
949
  const key = sessionKey({
@@ -773,6 +966,22 @@ export class Dispatcher {
773
966
  (streamable || !isBotCordChannel(channel));
774
967
  const canStream = streamable && typeof traceId === "string" && typeof channel.streamBlock === "function";
775
968
  const recordBlock = (block) => {
969
+ if (block.kind === "tool_use" && cloudRunBudget?.maxToolCalls !== undefined) {
970
+ observedToolCalls += 1;
971
+ if (observedToolCalls > cloudRunBudget.maxToolCalls && !controller.signal.aborted) {
972
+ slot.budgetExceeded = `tool call budget exceeded after ${observedToolCalls} tool call(s)`;
973
+ this.log.warn("dispatcher: cloud_run tool budget exceeded", {
974
+ agentId: msg.accountId,
975
+ roomId: msg.conversation.id,
976
+ topicId: msg.conversation.threadId ?? null,
977
+ turnId,
978
+ queueKey,
979
+ maxToolCalls: cloudRunBudget.maxToolCalls,
980
+ observedToolCalls,
981
+ });
982
+ controller.abort(new Error(slot.budgetExceeded));
983
+ }
984
+ }
776
985
  const summary = summarizeStreamBlock(block);
777
986
  slot.blocks.push(summary);
778
987
  if (this.transcript.enabled) {
@@ -957,7 +1166,8 @@ export class Dispatcher {
957
1166
  sendThinkingMarker(event.phase, event.label, "runtime");
958
1167
  }
959
1168
  : undefined;
960
- const onBlock = (canStream || this.transcript.enabled)
1169
+ const shouldObserveBlocks = canStream || this.transcript.enabled || cloudRunBudget?.maxToolCalls !== undefined;
1170
+ const onBlock = shouldObserveBlocks
961
1171
  ? (block) => {
962
1172
  // Always record adapter-emitted blocks for transcript fidelity, even
963
1173
  // after abort — the transcript reflects what the runtime emitted,
@@ -1059,6 +1269,7 @@ export class Dispatcher {
1059
1269
  const runtime = this.runtimeFactory(route.runtime, route.extraArgs);
1060
1270
  let result;
1061
1271
  let threw;
1272
+ const turnStartedAt = Date.now();
1062
1273
  try {
1063
1274
  try {
1064
1275
  result = await runtime.run({
@@ -1081,6 +1292,7 @@ export class Dispatcher {
1081
1292
  channel: msg.channel,
1082
1293
  conversationKind: msg.conversation.kind,
1083
1294
  },
1295
+ ...(cloudRunBudget ? { budget: cloudRunBudget } : {}),
1084
1296
  gateway: route.gateway,
1085
1297
  ...(route.hermesProfile ? { hermesProfile: route.hermesProfile } : {}),
1086
1298
  });
@@ -1091,6 +1303,26 @@ export class Dispatcher {
1091
1303
  finally {
1092
1304
  clearTimeout(timer);
1093
1305
  }
1306
+ // Fire onTurnComplete observer. Cloud daemon hooks this to settle
1307
+ // ``cloud_run`` envelopes against the Hub usage ledger. Errors are
1308
+ // swallowed so settle failures never break the reply path.
1309
+ if (this.onTurnComplete) {
1310
+ const wallTimeMs = Date.now() - turnStartedAt;
1311
+ try {
1312
+ await this.onTurnComplete({
1313
+ message: msg,
1314
+ result,
1315
+ wallTimeMs,
1316
+ ...(threw !== undefined ? { error: threw } : {}),
1317
+ });
1318
+ }
1319
+ catch (hookErr) {
1320
+ this.log.warn("dispatcher: onTurnComplete threw — continuing", {
1321
+ error: hookErr instanceof Error ? hookErr.message : String(hookErr),
1322
+ messageId: msg.id,
1323
+ });
1324
+ }
1325
+ }
1094
1326
  // Re-check the abort signal AFTER runtime.run resolves but BEFORE any
1095
1327
  // side effects (session write, reply send). This closes the race where
1096
1328
  // a cancel-previous arrives between runtime.run resolving and the
@@ -1103,7 +1335,7 @@ export class Dispatcher {
1103
1335
  // record from `runCancelPrevious` BEFORE aborting, so we MUST NOT also
1104
1336
  // emit a `turn_error` here — that would violate the "exactly one
1105
1337
  // terminal record per turnId" invariant.
1106
- if (controller.signal.aborted && !slot.timedOut) {
1338
+ if (controller.signal.aborted && !slot.timedOut && !slot.budgetExceeded) {
1107
1339
  return;
1108
1340
  }
1109
1341
  // Reply gating: BotCord network rooms only accept the runtime's plain
@@ -1126,7 +1358,9 @@ export class Dispatcher {
1126
1358
  const isOwnerChat = isOwnerChatRoom(msg);
1127
1359
  const canDeliverRuntimeText = isOwnerChat || !isBotCordChannel(channel);
1128
1360
  const canDeliverRuntimeDiagnostics = canDeliverRuntimeText || isBotCordChannel(channel);
1129
- if (slot.timedOut) {
1361
+ if (slot.timedOut || slot.budgetExceeded) {
1362
+ const phase = slot.budgetExceeded ? "budget" : "timeout";
1363
+ const error = slot.budgetExceeded ?? `runtime timeout after ${effectiveTurnTimeoutMs}ms`;
1130
1364
  this.transcript.write({
1131
1365
  ts: nowIso(),
1132
1366
  kind: "turn_error",
@@ -1134,8 +1368,8 @@ export class Dispatcher {
1134
1368
  agentId: msg.accountId,
1135
1369
  roomId: msg.conversation.id,
1136
1370
  topicId: msg.conversation.threadId ?? null,
1137
- phase: "timeout",
1138
- error: `runtime timeout after ${this.turnTimeoutMs}ms`,
1371
+ phase,
1372
+ error,
1139
1373
  durationMs: Date.now() - slot.dispatchedAt,
1140
1374
  });
1141
1375
  if (canDeliverRuntimeDiagnostics) {
@@ -1145,7 +1379,9 @@ export class Dispatcher {
1145
1379
  conversationId: msg.conversation.id,
1146
1380
  threadId: msg.conversation.threadId ?? null,
1147
1381
  type: "error",
1148
- text: `⚠️ Runtime timeout after ${Math.round(this.turnTimeoutMs / 60000)} minute(s); aborted`,
1382
+ text: slot.budgetExceeded
1383
+ ? `Cloud run budget exceeded: ${slot.budgetExceeded}`
1384
+ : `Runtime timeout after ${Math.round(effectiveTurnTimeoutMs / 60000)} minute(s); aborted`,
1149
1385
  replyTo: this.providerReplyTo(msg),
1150
1386
  traceId: msg.trace?.id ?? null,
1151
1387
  }, turnId);
@@ -1157,7 +1393,8 @@ export class Dispatcher {
1157
1393
  topicId: msg.conversation.threadId ?? null,
1158
1394
  turnId,
1159
1395
  queueKey,
1160
- timeoutMs: this.turnTimeoutMs,
1396
+ timeoutMs: effectiveTurnTimeoutMs,
1397
+ budgetExceeded: slot.budgetExceeded,
1161
1398
  });
1162
1399
  }
1163
1400
  return;
@@ -1209,8 +1446,28 @@ export class Dispatcher {
1209
1446
  }
1210
1447
  if (!result)
1211
1448
  return;
1212
- const replyText = (result.text || "").trim();
1213
- const finalTextField = truncateTextField(result.text || "");
1449
+ const rawReplyText = (result.text || "").trim();
1450
+ const replyLooksLikeAuthFailure = looksLikeRuntimeAuthFailure(rawReplyText);
1451
+ const replyText = replyLooksLikeAuthFailure ? "" : rawReplyText;
1452
+ const effectiveError = result.error ?? (replyLooksLikeAuthFailure ? rawReplyText : undefined);
1453
+ const authFailureError = effectiveError && looksLikeRuntimeAuthFailure(effectiveError) ? effectiveError : undefined;
1454
+ const finalTextField = truncateTextField(replyLooksLikeAuthFailure ? "" : result.text || "");
1455
+ if (replyLooksLikeAuthFailure) {
1456
+ this.log.error("dispatcher: runtime text looked like authentication failure; treating as error", {
1457
+ agentId: msg.accountId,
1458
+ roomId: msg.conversation.id,
1459
+ topicId: msg.conversation.threadId ?? null,
1460
+ turnId,
1461
+ runtime: route.runtime,
1462
+ error: rawReplyText,
1463
+ });
1464
+ }
1465
+ if (authFailureError) {
1466
+ this.recordRuntimeAuthFailure(route, msg, authFailureError);
1467
+ }
1468
+ else if (!effectiveError) {
1469
+ this.clearRuntimeAuthFailures(route, msg);
1470
+ }
1214
1471
  // Persist session before reply so next turn sees the new id even if send fails.
1215
1472
  //
1216
1473
  // Adapter contract:
@@ -1220,14 +1477,14 @@ export class Dispatcher {
1220
1477
  // even when the adapter echoes that id back
1221
1478
  // result.newSessionId truthy → upsert the entry
1222
1479
  // otherwise → no-op (e.g. codex intentionally never persists)
1223
- if (sessionId && result.error && !replyText) {
1480
+ if (sessionId && effectiveError && !replyText) {
1224
1481
  try {
1225
1482
  await this.sessionStore.delete(key);
1226
1483
  this.log.info("dispatcher: dropped stale runtime session", {
1227
1484
  key,
1228
1485
  prevRuntimeSessionId: sessionId,
1229
1486
  nextRuntimeSessionId: result.newSessionId || null,
1230
- error: result.error,
1487
+ error: effectiveError,
1231
1488
  });
1232
1489
  }
1233
1490
  catch (err) {
@@ -1237,7 +1494,7 @@ export class Dispatcher {
1237
1494
  });
1238
1495
  }
1239
1496
  }
1240
- else if (result.newSessionId) {
1497
+ else if (result.newSessionId && !authFailureError) {
1241
1498
  const session = {
1242
1499
  key,
1243
1500
  runtime: route.runtime,
@@ -1267,13 +1524,13 @@ export class Dispatcher {
1267
1524
  });
1268
1525
  }
1269
1526
  }
1270
- else if (sessionId && result.error) {
1527
+ else if (sessionId && effectiveError) {
1271
1528
  try {
1272
1529
  await this.sessionStore.delete(key);
1273
1530
  this.log.info("dispatcher: dropped stale runtime session", {
1274
1531
  key,
1275
1532
  prevRuntimeSessionId: sessionId,
1276
- error: result.error,
1533
+ error: effectiveError,
1277
1534
  });
1278
1535
  }
1279
1536
  catch (err) {
@@ -1284,14 +1541,14 @@ export class Dispatcher {
1284
1541
  }
1285
1542
  }
1286
1543
  if (!replyText) {
1287
- if (result.error) {
1544
+ if (effectiveError) {
1288
1545
  this.log.warn("dispatcher: runtime returned error without reply text", {
1289
1546
  agentId: msg.accountId,
1290
1547
  roomId: msg.conversation.id,
1291
1548
  topicId: msg.conversation.threadId ?? null,
1292
1549
  turnId,
1293
1550
  runtime: route.runtime,
1294
- error: result.error,
1551
+ error: effectiveError,
1295
1552
  });
1296
1553
  if (canDeliverRuntimeDiagnostics) {
1297
1554
  const sendResult = await this.sendReply(channel, {
@@ -1300,7 +1557,7 @@ export class Dispatcher {
1300
1557
  conversationId: msg.conversation.id,
1301
1558
  threadId: msg.conversation.threadId ?? null,
1302
1559
  type: "error",
1303
- text: `⚠️ Runtime error: ${truncate(result.error, 500)}`,
1560
+ text: `⚠️ Runtime error: ${truncate(effectiveError, 500)}`,
1304
1561
  replyTo: this.providerReplyTo(msg),
1305
1562
  traceId: msg.trace?.id ?? null,
1306
1563
  }, turnId);
@@ -1328,7 +1585,7 @@ export class Dispatcher {
1328
1585
  costUsd: result.costUsd,
1329
1586
  finalText: finalTextField,
1330
1587
  deliveryStatus: "empty_text",
1331
- deliveryReason: result.error ?? null,
1588
+ deliveryReason: effectiveError ?? null,
1332
1589
  blocks: slot.blocks,
1333
1590
  });
1334
1591
  return;
@@ -1,5 +1,5 @@
1
1
  import { type ChannelBackoffOptions } from "./channel-manager.js";
2
- import { type RuntimeFactory } from "./dispatcher.js";
2
+ import { type DispatcherOptions, type RuntimeFactory } from "./dispatcher.js";
3
3
  import { type GatewayLogger } from "./log.js";
4
4
  import { type TranscriptWriter } from "./transcript.js";
5
5
  import type { ChannelAdapter, GatewayChannelConfig, GatewayConfig, GatewayInboundMessage, GatewayOutboundMessage, GatewayRoute, GatewayRuntimeSnapshot, InboundObserver, MemoryContextBuilder, OutboundObserver, SystemContextBuilder, UserTurnBuilder } from "./types.js";
@@ -41,6 +41,14 @@ export interface GatewayBootOptions {
41
41
  * bookkeeping like loop-risk tracking.
42
42
  */
43
43
  onOutbound?: OutboundObserver;
44
+ onRuntimeCircuitBreakerChange?: () => void;
45
+ /**
46
+ * Optional observer fired after each runtime turn resolves. Forwarded
47
+ * to the dispatcher verbatim — see {@link Dispatcher} for semantics.
48
+ * Cloud daemon hooks this to settle ``cloud_run`` envelopes against
49
+ * the Hub usage ledger.
50
+ */
51
+ onTurnComplete?: DispatcherOptions["onTurnComplete"];
44
52
  /**
45
53
  * Optional attention gate (PR3, design §4.2). Forwarded to the dispatcher
46
54
  * verbatim — see {@link Dispatcher} for semantics. Returning `false` skips
@@ -1,5 +1,5 @@
1
1
  import { ChannelManager } from "./channel-manager.js";
2
- import { Dispatcher } from "./dispatcher.js";
2
+ import { Dispatcher, } from "./dispatcher.js";
3
3
  import { consoleLogger } from "./log.js";
4
4
  import { createRuntime } from "./runtimes/registry.js";
5
5
  import { DEFAULT_SESSION_STORE_MAX_ENTRY_AGE_MS, SessionStore } from "./session-store.js";
@@ -72,6 +72,8 @@ export class Gateway {
72
72
  onInbound: opts.onInbound,
73
73
  composeUserTurn: opts.composeUserTurn,
74
74
  onOutbound: opts.onOutbound,
75
+ onTurnComplete: opts.onTurnComplete,
76
+ onRuntimeCircuitBreakerChange: opts.onRuntimeCircuitBreakerChange,
75
77
  managedRoutes: this.managedRoutes,
76
78
  attentionGate: opts.attentionGate,
77
79
  resolveHubUrl: opts.resolveHubUrl,
@@ -105,6 +107,7 @@ export class Gateway {
105
107
  return {
106
108
  channels: this.channelManager.status(),
107
109
  turns: this.dispatcher.turns(),
110
+ runtimeCircuitBreakers: this.dispatcher.runtimeCircuitBreakers(),
108
111
  };
109
112
  }
110
113
  /**
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Runtime CLIs sometimes report authentication failures as ordinary final
3
+ * text. Keep this intentionally narrow so normal model replies about auth do
4
+ * not get reclassified unless they look like a top-level CLI/API failure.
5
+ */
6
+ export declare function looksLikeRuntimeAuthFailure(text: string): boolean;
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Runtime CLIs sometimes report authentication failures as ordinary final
3
+ * text. Keep this intentionally narrow so normal model replies about auth do
4
+ * not get reclassified unless they look like a top-level CLI/API failure.
5
+ */
6
+ export function looksLikeRuntimeAuthFailure(text) {
7
+ const s = text.trim();
8
+ if (!s)
9
+ return false;
10
+ return (/^(Failed to authenticate|Authentication failed|Invalid API key|Invalid Anthropic API key)\b/i.test(s) ||
11
+ /^API Error:\s*4\d\d\b/i.test(s) ||
12
+ /\b(API Error:\s*4\d\d|Request not allowed|invalid x-api-key)\b/i.test(s) ||
13
+ /^(Unauthorized|Forbidden)(?:\b|:)/i.test(s));
14
+ }
@@ -1,10 +1,17 @@
1
1
  import { NdjsonStreamAdapter, type NdjsonEventCtx } from "./ndjson-stream.js";
2
2
  import { type ProbeDeps } from "./probe.js";
3
3
  import type { RuntimeProbeResult, RuntimeRunOptions } from "../types.js";
4
+ export declare function scrubClaudeCodeAuthEnv(env: NodeJS.ProcessEnv): NodeJS.ProcessEnv;
4
5
  /** Resolve the Claude Code CLI path on PATH or the macOS desktop bundle fallback. */
5
6
  export declare function resolveClaudeCommand(deps?: ProbeDeps): string | null;
6
7
  /** Probe whether the Claude Code CLI is installed and report its version. */
7
8
  export declare function probeClaude(deps?: ProbeDeps): RuntimeProbeResult;
9
+ export interface ClaudeAuthProbeResult {
10
+ checked: boolean;
11
+ ok: boolean;
12
+ message: string;
13
+ }
14
+ export declare function probeClaudeAuth(deps?: ProbeDeps): ClaudeAuthProbeResult;
8
15
  /**
9
16
  * Claude Code adapter — spawns `claude -p "<text>" --output-format stream-json`
10
17
  * (with `--resume <sid>` when available) and parses the ndjson stream.
@@ -26,5 +33,6 @@ export declare class ClaudeCodeAdapter extends NdjsonStreamAdapter {
26
33
  run(opts: RuntimeRunOptions): Promise<import("../types.js").RuntimeRunResult>;
27
34
  protected resolveBinary(): string;
28
35
  protected buildArgs(opts: RuntimeRunOptions): string[];
36
+ protected spawnEnv(opts: RuntimeRunOptions): NodeJS.ProcessEnv;
29
37
  protected handleEvent(raw: unknown, ctx: NdjsonEventCtx): void;
30
38
  }