@swarmclawai/swarmclaw 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +19 -0
  2. package/package.json +5 -2
  3. package/skills/coding-agent/SKILL.md +111 -0
  4. package/skills/github/SKILL.md +140 -0
  5. package/skills/nano-banana-pro/SKILL.md +62 -0
  6. package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
  7. package/skills/nano-pdf/SKILL.md +53 -0
  8. package/skills/openai-image-gen/SKILL.md +78 -0
  9. package/skills/openai-image-gen/scripts/gen.py +328 -0
  10. package/skills/resourceful-problem-solving/SKILL.md +49 -0
  11. package/skills/skill-creator/SKILL.md +147 -0
  12. package/skills/skill-creator/scripts/init_skill.py +378 -0
  13. package/skills/skill-creator/scripts/quick_validate.py +159 -0
  14. package/skills/summarize/SKILL.md +77 -0
  15. package/src/app/api/auth/route.ts +20 -5
  16. package/src/app/api/chats/[id]/deploy/route.ts +11 -6
  17. package/src/app/api/chats/[id]/devserver/route.ts +17 -20
  18. package/src/app/api/chats/[id]/messages/route.ts +15 -11
  19. package/src/app/api/chats/[id]/route.ts +9 -10
  20. package/src/app/api/chats/[id]/stop/route.ts +5 -7
  21. package/src/app/api/chats/messages-route.test.ts +8 -6
  22. package/src/app/api/chats/route.ts +9 -10
  23. package/src/app/api/credentials/[id]/route.ts +4 -1
  24. package/src/app/api/extensions/marketplace/route.ts +5 -2
  25. package/src/app/api/ip/route.ts +2 -2
  26. package/src/app/api/memory/maintenance/route.ts +5 -2
  27. package/src/app/api/preview-server/route.ts +15 -12
  28. package/src/app/api/projects/[id]/route.ts +7 -46
  29. package/src/app/api/system/status/route.ts +11 -0
  30. package/src/app/api/upload/route.ts +4 -1
  31. package/src/cli/index.js +7 -0
  32. package/src/cli/spec.js +1 -0
  33. package/src/components/agents/agent-files-editor.tsx +44 -32
  34. package/src/components/agents/personality-builder.tsx +13 -7
  35. package/src/components/agents/trash-list.tsx +1 -1
  36. package/src/components/chat/chat-area.tsx +45 -23
  37. package/src/components/chat/message-bubble.test.ts +35 -0
  38. package/src/components/chat/message-bubble.tsx +20 -9
  39. package/src/components/chat/message-list.tsx +62 -42
  40. package/src/components/chat/swarm-status-card.tsx +10 -3
  41. package/src/components/input/chat-input.tsx +34 -14
  42. package/src/components/layout/daemon-indicator.tsx +7 -8
  43. package/src/components/layout/update-banner.tsx +8 -13
  44. package/src/components/logs/log-list.tsx +1 -1
  45. package/src/components/memory/memory-card.tsx +3 -1
  46. package/src/components/org-chart/org-chart-view.tsx +4 -0
  47. package/src/components/projects/project-list.tsx +4 -2
  48. package/src/components/projects/tabs/overview-tab.tsx +3 -2
  49. package/src/components/secrets/secret-sheet.tsx +1 -1
  50. package/src/components/secrets/secrets-list.tsx +1 -1
  51. package/src/components/shared/agent-switch-dialog.tsx +12 -6
  52. package/src/components/shared/dir-browser.tsx +22 -18
  53. package/src/components/skills/skill-sheet.tsx +2 -3
  54. package/src/components/tasks/task-list.tsx +1 -1
  55. package/src/components/tasks/task-sheet.tsx +1 -1
  56. package/src/hooks/use-openclaw-gateway.ts +46 -27
  57. package/src/instrumentation.ts +10 -7
  58. package/src/lib/chat/assistant-render-id.ts +3 -0
  59. package/src/lib/chat/chat-streaming-state.test.ts +42 -3
  60. package/src/lib/chat/chat-streaming-state.ts +20 -8
  61. package/src/lib/chat/chat.ts +18 -2
  62. package/src/lib/chat/queued-message-queue.test.ts +23 -1
  63. package/src/lib/chat/queued-message-queue.ts +11 -2
  64. package/src/lib/providers/anthropic.ts +6 -3
  65. package/src/lib/providers/claude-cli.ts +9 -3
  66. package/src/lib/providers/cli-utils.test.ts +124 -0
  67. package/src/lib/providers/cli-utils.ts +15 -0
  68. package/src/lib/providers/codex-cli.ts +9 -3
  69. package/src/lib/providers/gemini-cli.ts +6 -2
  70. package/src/lib/providers/index.ts +4 -1
  71. package/src/lib/providers/ollama.ts +5 -2
  72. package/src/lib/providers/openai.ts +8 -5
  73. package/src/lib/providers/opencode-cli.ts +6 -2
  74. package/src/lib/server/activity/activity-log.ts +21 -0
  75. package/src/lib/server/agents/agent-availability.test.ts +10 -5
  76. package/src/lib/server/agents/agent-cascade.ts +79 -59
  77. package/src/lib/server/agents/agent-registry.ts +23 -4
  78. package/src/lib/server/agents/agent-repository.ts +90 -0
  79. package/src/lib/server/agents/delegation-job-repository.ts +53 -0
  80. package/src/lib/server/agents/delegation-jobs.ts +11 -4
  81. package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
  82. package/src/lib/server/agents/guardian.ts +2 -2
  83. package/src/lib/server/agents/main-agent-loop.ts +14 -6
  84. package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
  85. package/src/lib/server/agents/subagent-runtime.ts +9 -6
  86. package/src/lib/server/agents/subagent-swarm.ts +3 -2
  87. package/src/lib/server/agents/task-session.ts +3 -4
  88. package/src/lib/server/approvals/approval-repository.ts +30 -0
  89. package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
  90. package/src/lib/server/autonomy/supervisor-reflection.ts +14 -1
  91. package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
  92. package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
  93. package/src/lib/server/chat-execution/chat-execution.ts +84 -1914
  94. package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
  95. package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
  96. package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
  97. package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
  98. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
  99. package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
  100. package/src/lib/server/chat-execution/continuation-evaluator.ts +4 -3
  101. package/src/lib/server/chat-execution/continuation-limits.ts +6 -3
  102. package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
  103. package/src/lib/server/chat-execution/message-classifier.ts +5 -2
  104. package/src/lib/server/chat-execution/post-stream-finalization.ts +5 -2
  105. package/src/lib/server/chat-execution/prompt-builder.ts +22 -1
  106. package/src/lib/server/chat-execution/prompt-sections.ts +55 -13
  107. package/src/lib/server/chat-execution/response-completeness.ts +5 -2
  108. package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
  109. package/src/lib/server/chat-execution/stream-agent-chat.ts +58 -25
  110. package/src/lib/server/chatrooms/chatroom-memory-bridge.ts +6 -3
  111. package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
  112. package/src/lib/server/connectors/bluebubbles.ts +7 -4
  113. package/src/lib/server/connectors/connector-inbound.ts +16 -13
  114. package/src/lib/server/connectors/connector-lifecycle.ts +11 -8
  115. package/src/lib/server/connectors/connector-outbound.ts +6 -3
  116. package/src/lib/server/connectors/connector-repository.ts +58 -0
  117. package/src/lib/server/connectors/discord.ts +10 -7
  118. package/src/lib/server/connectors/email.ts +17 -14
  119. package/src/lib/server/connectors/googlechat.ts +7 -4
  120. package/src/lib/server/connectors/inbound-audio-transcription.ts +5 -2
  121. package/src/lib/server/connectors/matrix.ts +6 -3
  122. package/src/lib/server/connectors/openclaw.ts +20 -17
  123. package/src/lib/server/connectors/outbox.ts +4 -1
  124. package/src/lib/server/connectors/runtime-state.test.ts +117 -0
  125. package/src/lib/server/connectors/runtime-state.ts +19 -0
  126. package/src/lib/server/connectors/session-consolidation.ts +5 -2
  127. package/src/lib/server/connectors/signal.ts +9 -6
  128. package/src/lib/server/connectors/slack.ts +13 -10
  129. package/src/lib/server/connectors/teams.ts +8 -5
  130. package/src/lib/server/connectors/telegram.ts +15 -12
  131. package/src/lib/server/connectors/whatsapp.ts +32 -29
  132. package/src/lib/server/credentials/credential-repository.ts +7 -0
  133. package/src/lib/server/embeddings.ts +4 -1
  134. package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
  135. package/src/lib/server/link-understanding.ts +4 -1
  136. package/src/lib/server/memory/memory-abstract.test.ts +59 -0
  137. package/src/lib/server/memory/memory-abstract.ts +59 -0
  138. package/src/lib/server/memory/memory-db.ts +40 -14
  139. package/src/lib/server/missions/mission-repository.ts +74 -0
  140. package/src/lib/server/missions/mission-service/actions.ts +6 -0
  141. package/src/lib/server/missions/mission-service/bindings.ts +9 -0
  142. package/src/lib/server/missions/mission-service/context.ts +4 -0
  143. package/src/lib/server/missions/mission-service/core.ts +2269 -0
  144. package/src/lib/server/missions/mission-service/queries.ts +12 -0
  145. package/src/lib/server/missions/mission-service/recovery.ts +5 -0
  146. package/src/lib/server/missions/mission-service/ticks.ts +9 -0
  147. package/src/lib/server/missions/mission-service.test.ts +9 -2
  148. package/src/lib/server/missions/mission-service.ts +6 -2263
  149. package/src/lib/server/openclaw/gateway.ts +8 -5
  150. package/src/lib/server/persistence/repository-utils.ts +154 -0
  151. package/src/lib/server/persistence/storage-context.ts +51 -0
  152. package/src/lib/server/persistence/transaction.ts +1 -0
  153. package/src/lib/server/project-utils.ts +13 -0
  154. package/src/lib/server/projects/project-repository.ts +36 -0
  155. package/src/lib/server/projects/project-service.ts +79 -0
  156. package/src/lib/server/protocols/protocol-agent-turn.ts +5 -2
  157. package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
  158. package/src/lib/server/protocols/protocol-run-lifecycle.ts +5 -2
  159. package/src/lib/server/protocols/protocol-step-helpers.ts +4 -1
  160. package/src/lib/server/provider-health.ts +18 -0
  161. package/src/lib/server/query-expansion.ts +4 -1
  162. package/src/lib/server/runtime/alert-dispatch.ts +8 -7
  163. package/src/lib/server/runtime/daemon-policy.ts +1 -1
  164. package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
  165. package/src/lib/server/runtime/daemon-state/health.ts +6 -0
  166. package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
  167. package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
  168. package/src/lib/server/runtime/daemon-state.test.ts +48 -0
  169. package/src/lib/server/runtime/daemon-state.ts +3 -1331
  170. package/src/lib/server/runtime/estop-repository.ts +4 -0
  171. package/src/lib/server/runtime/estop.ts +3 -1
  172. package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
  173. package/src/lib/server/runtime/heartbeat-service.ts +78 -34
  174. package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
  175. package/src/lib/server/runtime/idle-window.ts +6 -3
  176. package/src/lib/server/runtime/network.ts +11 -0
  177. package/src/lib/server/runtime/orchestrator-events.ts +2 -2
  178. package/src/lib/server/runtime/perf.ts +4 -1
  179. package/src/lib/server/runtime/process-manager.ts +7 -4
  180. package/src/lib/server/runtime/queue/claims.ts +4 -0
  181. package/src/lib/server/runtime/queue/core.ts +2079 -0
  182. package/src/lib/server/runtime/queue/execution.ts +7 -0
  183. package/src/lib/server/runtime/queue/followups.ts +4 -0
  184. package/src/lib/server/runtime/queue/queries.ts +12 -0
  185. package/src/lib/server/runtime/queue/recovery.ts +7 -0
  186. package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
  187. package/src/lib/server/runtime/queue-repository.ts +17 -0
  188. package/src/lib/server/runtime/queue.ts +5 -2058
  189. package/src/lib/server/runtime/run-ledger.ts +6 -5
  190. package/src/lib/server/runtime/run-repository.ts +73 -0
  191. package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
  192. package/src/lib/server/runtime/runtime-settings.ts +1 -1
  193. package/src/lib/server/runtime/runtime-state.ts +99 -0
  194. package/src/lib/server/runtime/scheduler.ts +13 -8
  195. package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
  196. package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
  197. package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
  198. package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
  199. package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
  200. package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
  201. package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
  202. package/src/lib/server/runtime/session-run-manager.ts +72 -1374
  203. package/src/lib/server/runtime/watch-job-repository.ts +35 -0
  204. package/src/lib/server/runtime/watch-jobs.ts +3 -1
  205. package/src/lib/server/sandbox/bridge-auth-registry.ts +6 -0
  206. package/src/lib/server/sandbox/novnc-auth.ts +10 -0
  207. package/src/lib/server/schedules/schedule-repository.ts +42 -0
  208. package/src/lib/server/session-tools/context.ts +14 -0
  209. package/src/lib/server/session-tools/discovery.ts +9 -6
  210. package/src/lib/server/session-tools/index.ts +3 -1
  211. package/src/lib/server/session-tools/platform.ts +1 -1
  212. package/src/lib/server/session-tools/subagent.ts +23 -2
  213. package/src/lib/server/session-tools/wallet.ts +4 -1
  214. package/src/lib/server/sessions/session-repository.ts +85 -0
  215. package/src/lib/server/settings/settings-repository.ts +25 -0
  216. package/src/lib/server/skills/clawhub-client.ts +4 -1
  217. package/src/lib/server/skills/runtime-skill-resolver.ts +8 -2
  218. package/src/lib/server/skills/skill-discovery.test.ts +2 -2
  219. package/src/lib/server/skills/skill-discovery.ts +2 -2
  220. package/src/lib/server/skills/skill-eligibility.ts +6 -0
  221. package/src/lib/server/skills/skill-repository.ts +14 -0
  222. package/src/lib/server/solana.ts +6 -0
  223. package/src/lib/server/storage-auth.ts +5 -5
  224. package/src/lib/server/storage-normalization.ts +4 -0
  225. package/src/lib/server/storage.ts +32 -32
  226. package/src/lib/server/tasks/task-followups.ts +4 -1
  227. package/src/lib/server/tasks/task-repository.ts +54 -0
  228. package/src/lib/server/tool-loop-detection.ts +8 -3
  229. package/src/lib/server/tool-planning.ts +226 -0
  230. package/src/lib/server/tool-retry.ts +4 -3
  231. package/src/lib/server/usage/usage-repository.ts +30 -0
  232. package/src/lib/server/wallet/wallet-portfolio.ts +29 -0
  233. package/src/lib/server/webhooks/webhook-repository.ts +10 -0
  234. package/src/lib/server/ws-hub.ts +5 -2
  235. package/src/lib/strip-internal-metadata.test.ts +78 -37
  236. package/src/lib/strip-internal-metadata.ts +20 -6
  237. package/src/stores/use-approval-store.ts +7 -1
  238. package/src/stores/use-chat-store.test.ts +54 -0
  239. package/src/stores/use-chat-store.ts +26 -6
  240. package/src/types/index.ts +6 -0
  241. /package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0
@@ -1,1331 +1,3 @@
1
- import { loadQueue, loadSchedules, loadSessions, loadConnectors, saveConnectors, loadWebhookRetryQueue, upsertWebhookRetry, deleteWebhookRetry, loadWebhooks, loadAgents, loadSettings, appendWebhookLog, loadCredentials, decryptKey, pruneExpiredLocks } from '@/lib/server/storage'
2
- import { notify } from '@/lib/server/ws-hub'
3
- import { processNext, cleanupFinishedTaskSessions, validateCompletedTasksQueue, recoverStalledRunningTasks, resumeQueue, promoteDeferred } from '@/lib/server/runtime/queue'
4
- import { startScheduler, stopScheduler } from '@/lib/server/runtime/scheduler'
5
- import { sweepOrphanedBrowsers, getActiveBrowserCount } from '@/lib/server/session-tools'
6
- import {
7
- autoStartConnectors,
8
- listRunningConnectors,
9
- sendConnectorMessage,
10
- stopAllConnectors,
11
- startConnector,
12
- getConnectorStatus,
13
- checkConnectorHealth,
14
- createConnectorReconnectState,
15
- advanceConnectorReconnectState,
16
- clearReconnectState,
17
- getAllReconnectStates,
18
- getReconnectState,
19
- setReconnectState,
20
- } from '@/lib/server/connectors/manager'
21
- import { startConnectorOutboxWorker, stopConnectorOutboxWorker } from '@/lib/server/connectors/outbox'
22
- import { startHeartbeatService, stopHeartbeatService, getHeartbeatServiceStatus, pruneHeartbeatState } from '@/lib/server/runtime/heartbeat-service'
23
- import { hasOpenClawAgents, ensureGatewayConnected, disconnectAutoGateways, getGateway } from '@/lib/server/openclaw/gateway'
24
- import { enqueueSessionRun, sweepStuckRuns } from '@/lib/server/runtime/session-run-manager'
25
- import { pruneOldRuns } from '@/lib/server/runtime/run-ledger'
26
- import { getEnabledCapabilitySelection } from '@/lib/capability-selection'
27
- import { WORKSPACE_DIR } from '@/lib/server/data-dir'
28
- import { DEFAULT_HEARTBEAT_INTERVAL_SEC } from '@/lib/runtime/heartbeat-defaults'
29
- import { genId } from '@/lib/id'
30
- import { isAgentDisabled } from '@/lib/server/agents/agent-availability'
31
- import { errorMessage, hmrSingleton } from '@/lib/shared-utils'
32
- import path from 'node:path'
33
- import type { Session, WebhookRetryEntry } from '@/types'
34
- import { createNotification } from '@/lib/server/create-notification'
35
- import { pingProvider, OPENAI_COMPATIBLE_DEFAULTS, restoreProviderHealthState } from '@/lib/server/provider-health'
36
- import { runIntegrityMonitor } from '@/lib/server/integrity-monitor'
37
- import { notifyOrchestrators } from '@/lib/server/runtime/orchestrator-events'
38
- import { recoverStaleDelegationJobs } from '@/lib/server/agents/delegation-jobs'
39
- import { restoreSwarmRegistry } from '@/lib/server/agents/subagent-swarm'
40
- import { pruneMainLoopState } from '@/lib/server/agents/main-agent-loop'
41
- import { pruneSystemEventQueues, pruneOrchestratorEventQueues } from '@/lib/server/runtime/system-events'
42
- import { checkSwarmTimeouts, ensureProtocolEngineRecovered } from '@/lib/server/protocols/protocol-service'
43
- import { sweepManagedProcesses, reapOrphanedSandboxContainers } from '@/lib/server/runtime/process-manager'
44
- import { drainIdleWindowCallbacks } from '@/lib/server/runtime/idle-window'
45
- import {
46
- buildSessionHeartbeatHealthDedupKey,
47
- daemonAutostartEnvEnabled,
48
- isDaemonBackgroundServicesEnabled,
49
- parseCronToMs,
50
- parseHeartbeatIntervalSec,
51
- shouldNotifyProviderReachabilityIssue,
52
- shouldSuppressSessionHeartbeatHealthAlert,
53
- shouldSuppressSyntheticAgentHealthAlert,
54
- } from '@/lib/server/runtime/daemon-policy'
55
- import { loadEstopState } from '@/lib/server/runtime/estop'
56
- import { classifyRuntimeFailure, recordSupervisorIncident } from '@/lib/server/autonomy/supervisor-reflection'
57
- import { getMemoryDb } from '@/lib/server/memory/memory-db'
58
-
59
- const QUEUE_CHECK_INTERVAL = 30_000 // 30 seconds
60
- const BROWSER_SWEEP_INTERVAL = 60_000 // 60 seconds
61
- const BROWSER_MAX_AGE = 10 * 60 * 1000 // 10 minutes idle = orphaned
62
- const HEALTH_CHECK_INTERVAL = 120_000 // 2 minutes
63
- const CONNECTOR_HEALTH_CHECK_INTERVAL = 15_000 // 15 seconds
64
- const MEMORY_CONSOLIDATION_INTERVAL = 6 * 3600_000 // 6 hours
65
- const MEMORY_CONSOLIDATION_INITIAL_DELAY = 60_000 // 1 minute after daemon start
66
- const STALE_MULTIPLIER = 4 // session is stale after N × heartbeat interval
67
- const STALE_MIN_MS = 4 * 60 * 1000 // minimum 4 minutes regardless of interval
68
- const STALE_AUTO_DISABLE_MULTIPLIER = 16 // auto-disable after much longer sustained staleness
69
- const STALE_AUTO_DISABLE_MIN_MS = 45 * 60 * 1000 // never auto-disable before 45 minutes
70
- const CONNECTOR_RESTART_BASE_MS = 30_000
71
- const CONNECTOR_RESTART_MAX_MS = 15 * 60 * 1000
72
- const MAX_WAKE_ATTEMPTS = 3
73
- const QUEUE_PROCESS_TIMEOUT = 10 * 60_000 // 10 minutes
74
- const SHUTDOWN_TIMEOUT_MS = 15_000
75
- const PROVIDER_PING_CB_THRESHOLD = 3 // trips after 3 consecutive failures
76
- const PROVIDER_PING_CB_BASE_MS = 300_000 // 5 min initial cooldown
77
- const PROVIDER_PING_CB_MAX_MS = 1_800_000 // 30 min max cooldown
78
-
79
- export {
80
- buildSessionHeartbeatHealthDedupKey,
81
- isDaemonBackgroundServicesEnabled,
82
- shouldNotifyProviderReachabilityIssue,
83
- shouldSuppressSessionHeartbeatHealthAlert,
84
- shouldSuppressSyntheticAgentHealthAlert,
85
- }
86
-
87
- // Store daemon state on globalThis to survive HMR reloads
88
- interface DaemonState {
89
- queueIntervalId: ReturnType<typeof setInterval> | null
90
- browserSweepId: ReturnType<typeof setInterval> | null
91
- healthIntervalId: ReturnType<typeof setInterval> | null
92
- connectorHealthIntervalId: ReturnType<typeof setInterval> | null
93
- memoryConsolidationTimeoutId: ReturnType<typeof setTimeout> | null
94
- memoryConsolidationIntervalId: ReturnType<typeof setInterval> | null
95
- evalSchedulerIntervalId: ReturnType<typeof setInterval> | null
96
- swarmTimeoutIntervalId: ReturnType<typeof setInterval> | null
97
- /** Session IDs we've already alerted as stale (alert-once semantics). */
98
- staleSessionIds: Set<string>
99
- /** OpenClaw gateway agent IDs currently considered down. */
100
- openclawDownAgentIds: Set<string>
101
- /** Per-agent auto-repair state for OpenClaw gateways. */
102
- openclawRepairState: Map<string, { attempts: number; lastAttemptAt: number; cooldownUntil: number }>
103
- lastIntegrityCheckAt: number | null
104
- lastIntegrityDriftCount: number
105
- manualStopRequested: boolean
106
- running: boolean
107
- lastProcessedAt: number | null
108
- healthCheckRunning: boolean
109
- connectorHealthCheckRunning: boolean
110
- shuttingDown: boolean
111
- providerPingCircuitBreaker: Map<string, { consecutiveFailures: number; skipUntil: number }>
112
- }
113
-
114
- const ds: DaemonState = hmrSingleton<DaemonState>('__swarmclaw_daemon__', () => ({
115
- queueIntervalId: null,
116
- browserSweepId: null,
117
- healthIntervalId: null,
118
- connectorHealthIntervalId: null,
119
- memoryConsolidationTimeoutId: null,
120
- memoryConsolidationIntervalId: null,
121
- evalSchedulerIntervalId: null,
122
- swarmTimeoutIntervalId: null,
123
- staleSessionIds: new Set<string>(),
124
- openclawDownAgentIds: new Set<string>(),
125
- openclawRepairState: new Map<string, { attempts: number; lastAttemptAt: number; cooldownUntil: number }>(),
126
- lastIntegrityCheckAt: null,
127
- lastIntegrityDriftCount: 0,
128
- manualStopRequested: false,
129
- running: false,
130
- lastProcessedAt: null,
131
- healthCheckRunning: false,
132
- connectorHealthCheckRunning: false,
133
- shuttingDown: false,
134
- providerPingCircuitBreaker: new Map<string, { consecutiveFailures: number; skipUntil: number }>(),
135
- }))
136
-
137
- // Backfill fields for hot-reloaded daemon state objects from older code versions.
138
- if (!ds.staleSessionIds) ds.staleSessionIds = new Set<string>()
139
- if (!ds.openclawDownAgentIds) ds.openclawDownAgentIds = new Set<string>()
140
- if (!ds.openclawRepairState) ds.openclawRepairState = new Map<string, { attempts: number; lastAttemptAt: number; cooldownUntil: number }>()
141
- if (ds.lastIntegrityCheckAt === undefined) ds.lastIntegrityCheckAt = null
142
- if (ds.lastIntegrityDriftCount === undefined) ds.lastIntegrityDriftCount = 0
143
- // Migrate from old issueLastAlertAt map if present (HMR across code versions)
144
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
145
- if ((ds as any).issueLastAlertAt) delete (ds as any).issueLastAlertAt
146
- if (ds.healthIntervalId === undefined) ds.healthIntervalId = null
147
- if (ds.connectorHealthIntervalId === undefined) ds.connectorHealthIntervalId = null
148
- if (ds.manualStopRequested === undefined) ds.manualStopRequested = false
149
- if (ds.memoryConsolidationTimeoutId === undefined) ds.memoryConsolidationTimeoutId = null
150
- if (ds.memoryConsolidationIntervalId === undefined) ds.memoryConsolidationIntervalId = null
151
- if (ds.evalSchedulerIntervalId === undefined) ds.evalSchedulerIntervalId = null
152
- if (ds.swarmTimeoutIntervalId === undefined) ds.swarmTimeoutIntervalId = null
153
- if (ds.healthCheckRunning === undefined) ds.healthCheckRunning = false
154
- if (ds.connectorHealthCheckRunning === undefined) ds.connectorHealthCheckRunning = false
155
- if (ds.shuttingDown === undefined) ds.shuttingDown = false
156
- if (!ds.providerPingCircuitBreaker) ds.providerPingCircuitBreaker = new Map<string, { consecutiveFailures: number; skipUntil: number }>()
157
-
158
- export function ensureDaemonStarted(source = 'unknown'): boolean {
159
- if (ds.running) return false
160
- if (!daemonAutostartEnvEnabled()) return false
161
- if (ds.manualStopRequested) return false
162
- if (loadEstopState().level !== 'none') return false
163
- startDaemon({ source, manualStart: false })
164
- return true
165
- }
166
-
167
- export function startDaemon(options?: { source?: string; manualStart?: boolean }) {
168
- const source = options?.source || 'unknown'
169
- const manualStart = options?.manualStart === true
170
- if (manualStart) ds.manualStopRequested = false
171
- const estop = loadEstopState()
172
- if (estop.level !== 'none') {
173
- notify('daemon')
174
- console.warn(`[daemon] Start blocked by estop (level=${estop.level}, source=${source})`)
175
- return
176
- }
177
-
178
- if (ds.running) {
179
- // In dev/HMR, daemon can already be flagged running while new interval types
180
- // (for example health monitor) were introduced in newer code.
181
- startQueueProcessor()
182
- startBrowserSweep()
183
- startHeartbeatService()
184
- startMemoryConsolidation()
185
- startSwarmTimeoutChecker()
186
- syncDaemonBackgroundServices({ runConnectorHealthCheckImmediately: false })
187
- return
188
- }
189
- ds.running = true
190
- notify('daemon')
191
- console.log(`[daemon] Starting daemon (source=${source}, scheduler + queue processor + heartbeat)`)
192
-
193
- try {
194
- validateCompletedTasksQueue()
195
- cleanupFinishedTaskSessions()
196
- recoverStaleDelegationJobs({ fullRestart: true })
197
- ensureProtocolEngineRecovered()
198
- restoreProviderHealthState()
199
- try {
200
- const lost = restoreSwarmRegistry()
201
- if (lost > 0) console.log(`[daemon] Marked ${lost} in-flight swarm(s) as lost after restart`)
202
- } catch { /* best-effort */ }
203
- resumeQueue()
204
- startScheduler()
205
- startQueueProcessor()
206
- startBrowserSweep()
207
- startHeartbeatService()
208
- startMemoryConsolidation()
209
- startSwarmTimeoutChecker()
210
- syncDaemonBackgroundServices({ runConnectorHealthCheckImmediately: false })
211
- } catch (err: unknown) {
212
- ds.running = false
213
- notify('daemon')
214
- console.error('[daemon] Failed to start:', errorMessage(err))
215
- throw err
216
- }
217
-
218
- if (isDaemonBackgroundServicesEnabled()) {
219
- // Auto-start enabled connectors only when the full background stack is enabled.
220
- autoStartConnectors().catch((err: unknown) => {
221
- console.error('[daemon] Error auto-starting connectors:', errorMessage(err))
222
- })
223
- }
224
- }
225
-
226
- export async function stopDaemon(options?: { source?: string; manualStop?: boolean }) {
227
- const source = options?.source || 'unknown'
228
- if (options?.manualStop === true) ds.manualStopRequested = true
229
- if (!ds.running) return
230
- ds.running = false
231
- ds.shuttingDown = true
232
- notify('daemon')
233
- console.log(`[daemon] Stopping daemon (source=${source})`)
234
-
235
- stopScheduler()
236
- stopQueueProcessor()
237
- stopBrowserSweep()
238
- stopHealthMonitor()
239
- stopConnectorHealthMonitor()
240
- stopConnectorOutboxWorker()
241
- stopHeartbeatService()
242
- stopMemoryConsolidation()
243
- stopSwarmTimeoutChecker()
244
- stopEvalScheduler()
245
- try {
246
- await Promise.race([
247
- stopAllConnectors({ disable: false }),
248
- new Promise<void>((_, reject) =>
249
- setTimeout(() => reject(new Error('Connector shutdown timed out')), SHUTDOWN_TIMEOUT_MS)
250
- ),
251
- ])
252
- } catch (err: unknown) {
253
- console.warn(`[daemon] Connector shutdown issue: ${errorMessage(err)}`)
254
- } finally {
255
- ds.shuttingDown = false
256
- }
257
- }
258
-
259
- function startBrowserSweep() {
260
- if (ds.browserSweepId) return
261
- ds.browserSweepId = setInterval(() => {
262
- const count = getActiveBrowserCount()
263
- if (count > 0) {
264
- const cleaned = sweepOrphanedBrowsers(BROWSER_MAX_AGE)
265
- if (cleaned > 0) {
266
- console.log(`[daemon] Cleaned ${cleaned} orphaned browser(s), ${getActiveBrowserCount()} still active`)
267
- }
268
- }
269
- }, BROWSER_SWEEP_INTERVAL)
270
- }
271
-
272
- function stopBrowserSweep() {
273
- if (ds.browserSweepId) {
274
- clearInterval(ds.browserSweepId)
275
- ds.browserSweepId = null
276
- }
277
- // Kill all remaining browsers on shutdown
278
- sweepOrphanedBrowsers(0)
279
- }
280
-
281
- export async function syncOpenClawGatewayLifecycle() {
282
- if (!hasOpenClawAgents()) {
283
- disconnectAutoGateways()
284
- return
285
- }
286
- if (!getGateway()?.connected) {
287
- await ensureGatewayConnected()
288
- }
289
- }
290
-
291
- function startQueueProcessor() {
292
- if (ds.queueIntervalId) return
293
- ds.queueIntervalId = setInterval(async () => {
294
- if (!ds.running) return
295
- const queue = loadQueue()
296
- if (queue.length > 0) {
297
- console.log(`[daemon] Processing ${queue.length} queued task(s)`)
298
- try {
299
- await Promise.race([
300
- processNext(),
301
- new Promise<void>((_, reject) =>
302
- setTimeout(() => reject(new Error('Queue processing timed out')), QUEUE_PROCESS_TIMEOUT)
303
- ),
304
- ])
305
- } catch (err: unknown) {
306
- console.error(`[daemon] Queue processing error/timeout: ${errorMessage(err)}`)
307
- }
308
- ds.lastProcessedAt = Date.now()
309
- }
310
- if (!isDaemonBackgroundServicesEnabled()) return
311
- // OpenClaw gateway lifecycle: lazy connect for active OpenClaw agents, stop auto-managed reconnects when none remain.
312
- try {
313
- await syncOpenClawGatewayLifecycle()
314
- } catch { /* gateway errors are non-fatal */ }
315
- }, QUEUE_CHECK_INTERVAL)
316
- }
317
-
318
- function stopQueueProcessor() {
319
- if (ds.queueIntervalId) {
320
- clearInterval(ds.queueIntervalId)
321
- ds.queueIntervalId = null
322
- }
323
- }
324
-
325
- async function sendHealthAlert(input: string | {
326
- text: string
327
- dedupKey?: string
328
- entityType?: string
329
- entityId?: string
330
- }) {
331
- const payload = typeof input === 'string' ? { text: input } : input
332
- const text = payload.text
333
- console.warn(`[health] ${text}`)
334
- createNotification({
335
- type: 'warning',
336
- title: 'SwarmClaw health alert',
337
- message: text,
338
- dedupKey: payload.dedupKey || `health-alert:${text}`,
339
- entityType: payload.entityType,
340
- entityId: payload.entityId,
341
- dispatchExternally: false,
342
- })
343
- }
344
-
345
- async function runConnectorHealthChecks(now: number) {
346
- // First, collapse dead runtime instances into persisted error state so the
347
- // daemon can own the restart cadence and backoff policy.
348
- try {
349
- await checkConnectorHealth()
350
- } catch (err: unknown) {
351
- console.error('[health] Connector isAlive check failed:', errorMessage(err))
352
- }
353
-
354
- const connectors = loadConnectors()
355
- for (const connector of Object.values(connectors) as unknown as Record<string, unknown>[]) {
356
- if (!connector?.id || typeof connector.id !== 'string') continue
357
- if (connector.isEnabled !== true) {
358
- clearReconnectState(connector.id)
359
- continue
360
- }
361
-
362
- const runtimeStatus = getConnectorStatus(connector.id)
363
- if (runtimeStatus === 'running') {
364
- clearReconnectState(connector.id)
365
- continue
366
- }
367
-
368
- const current = getReconnectState(connector.id)
369
- ?? createConnectorReconnectState(
370
- { error: typeof connector.lastError === 'string' ? connector.lastError : '' },
371
- { initialBackoffMs: CONNECTOR_RESTART_BASE_MS },
372
- )
373
-
374
- if (current.exhausted) {
375
- continue
376
- }
377
-
378
- if (current.nextRetryAt > now) continue
379
-
380
- // Notify on first detection of a down connector
381
- if (current.attempts === 0) {
382
- createNotification({
383
- type: 'warning',
384
- title: `Connector "${connector.name}" is down`,
385
- message: 'Auto-restart in progress.',
386
- dedupKey: `connector-down:${connector.id}`,
387
- entityType: 'connector',
388
- entityId: connector.id,
389
- })
390
- }
391
-
392
- try {
393
- await startConnector(connector.id)
394
- clearReconnectState(connector.id)
395
- await sendHealthAlert(`Connector "${connector.name}" (${connector.platform}) was down and has been auto-restarted.`)
396
- } catch (err: unknown) {
397
- const message = errorMessage(err)
398
- const next = advanceConnectorReconnectState(current, message, now, {
399
- initialBackoffMs: CONNECTOR_RESTART_BASE_MS,
400
- maxBackoffMs: CONNECTOR_RESTART_MAX_MS,
401
- maxAttempts: MAX_WAKE_ATTEMPTS,
402
- })
403
- setReconnectState(connector.id, next)
404
- if (next.exhausted) {
405
- console.warn(`[health] Connector "${connector.name}" exceeded ${MAX_WAKE_ATTEMPTS} auto-restart attempts — giving up until the server restarts or the user retries manually`)
406
- connector.status = 'error'
407
- connector.lastError = `Auto-restart gave up after ${MAX_WAKE_ATTEMPTS} attempts: ${message}`
408
- connector.updatedAt = Date.now()
409
- connectors[connector.id] = connector
410
- saveConnectors(connectors)
411
- notify('connectors')
412
- notifyOrchestrators(`Connector ${connector.name || connector.id} status: error — auto-restart exhausted after ${MAX_WAKE_ATTEMPTS} attempts`, `connector-status:${connector.id}`)
413
- createNotification({
414
- type: 'error',
415
- title: `Connector "${connector.name}" failed`,
416
- message: `Auto-restart gave up after ${MAX_WAKE_ATTEMPTS} attempts.`,
417
- dedupKey: `connector-gave-up:${connector.id}`,
418
- entityType: 'connector',
419
- entityId: connector.id,
420
- })
421
- } else {
422
- console.warn(`[health] Connector auto-restart failed for ${connector.name} (attempt ${next.attempts}/${MAX_WAKE_ATTEMPTS}): ${message}`)
423
- }
424
- }
425
- }
426
-
427
- // Purge restart state for connectors that no longer exist in storage
428
- for (const id of Object.keys(getAllReconnectStates())) {
429
- if (!connectors[id] || connectors[id]?.isEnabled !== true) clearReconnectState(id)
430
- }
431
- }
432
-
433
- async function processWebhookRetries() {
434
- const retryQueue = loadWebhookRetryQueue()
435
- const now = Date.now()
436
- const dueEntries: WebhookRetryEntry[] = []
437
-
438
- for (const raw of Object.values(retryQueue)) {
439
- const entry = raw as WebhookRetryEntry
440
- if (entry.deadLettered) continue
441
- if (entry.nextRetryAt > now) continue
442
- dueEntries.push(entry)
443
- }
444
-
445
- if (dueEntries.length === 0) return
446
-
447
- const webhooks = loadWebhooks()
448
- const agents = loadAgents()
449
- const sessions = loadSessions()
450
-
451
- for (const entry of dueEntries) {
452
- const webhook = webhooks[entry.webhookId] as unknown as Record<string, unknown> | undefined
453
- if (!webhook) {
454
- // Webhook deleted — drop the retry
455
- deleteWebhookRetry(entry.id)
456
- continue
457
- }
458
-
459
- const agentId = typeof webhook.agentId === 'string' ? webhook.agentId : ''
460
- const agent = agentId ? (agents[agentId] as unknown as Record<string, unknown> | undefined) : null
461
- if (!agent) {
462
- entry.deadLettered = true
463
- upsertWebhookRetry(entry.id, entry)
464
- console.warn(`[webhook-retry] Dead-lettered ${entry.id}: agent not found for webhook ${entry.webhookId}`)
465
- continue
466
- }
467
- if (isAgentDisabled(agent)) {
468
- entry.deadLettered = true
469
- upsertWebhookRetry(entry.id, entry)
470
- console.warn(`[webhook-retry] Dead-lettered ${entry.id}: agent disabled for webhook ${entry.webhookId}`)
471
- continue
472
- }
473
-
474
- // Find or create a webhook session (same logic as the POST handler)
475
- const sessionName = `webhook:${entry.webhookId}`
476
- let session = Object.values(sessions).find(
477
- (s: unknown) => {
478
- const rec = s as Record<string, unknown>
479
- return rec.name === sessionName && rec.agentId === agent.id
480
- },
481
- ) as unknown as Record<string, unknown> | undefined
482
-
483
- if (!session) {
484
- const sessionId = genId()
485
- const ts = Date.now()
486
- session = {
487
- id: sessionId,
488
- name: sessionName,
489
- cwd: WORKSPACE_DIR,
490
- user: 'system',
491
- provider: agent.provider || 'claude-cli',
492
- model: agent.model || '',
493
- credentialId: agent.credentialId || null,
494
- apiEndpoint: agent.apiEndpoint || null,
495
- claudeSessionId: null,
496
- codexThreadId: null,
497
- opencodeSessionId: null,
498
- delegateResumeIds: { claudeCode: null, codex: null, opencode: null, gemini: null },
499
- messages: [],
500
- createdAt: ts,
501
- lastActiveAt: ts,
502
- sessionType: 'human',
503
- agentId: agent.id,
504
- parentSessionId: null,
505
- ...getEnabledCapabilitySelection(agent),
506
- heartbeatEnabled: (agent.heartbeatEnabled as boolean | undefined) ?? false,
507
- heartbeatIntervalSec: (agent.heartbeatIntervalSec as number | null | undefined) ?? null,
508
- }
509
- const { upsertSession: upsert } = await import('@/lib/server/storage')
510
- upsert(session.id as string, session)
511
- }
512
-
513
- const payloadPreview = (entry.payload || '').slice(0, 12_000)
514
- const prompt = [
515
- 'Webhook event received (retry).',
516
- `Webhook ID: ${entry.webhookId}`,
517
- `Webhook Name: ${(webhook.name as string) || entry.webhookId}`,
518
- `Source: ${(webhook.source as string) || 'custom'}`,
519
- `Event: ${entry.event}`,
520
- `Retry attempt: ${entry.attempts}`,
521
- `Original received at: ${new Date(entry.createdAt).toISOString()}`,
522
- '',
523
- 'Payload:',
524
- payloadPreview || '(empty payload)',
525
- '',
526
- 'Handle this event now. If this requires notifying the user, use configured connector tools.',
527
- ].join('\n')
528
-
529
- try {
530
- const run = enqueueSessionRun({
531
- sessionId: session.id as string,
532
- message: prompt,
533
- source: 'webhook',
534
- internal: false,
535
- mode: 'followup',
536
- })
537
-
538
- appendWebhookLog(genId(8), {
539
- id: genId(8),
540
- webhookId: entry.webhookId,
541
- event: entry.event,
542
- payload: (entry.payload || '').slice(0, 2000),
543
- status: 'success',
544
- sessionId: session.id,
545
- runId: run.runId,
546
- timestamp: Date.now(),
547
- })
548
-
549
- deleteWebhookRetry(entry.id)
550
- console.log(`[webhook-retry] Successfully retried ${entry.id} for webhook ${entry.webhookId} (attempt ${entry.attempts})`)
551
- } catch (err: unknown) {
552
- const errorMsg = errorMessage(err)
553
- entry.attempts += 1
554
-
555
- if (entry.attempts >= entry.maxAttempts) {
556
- entry.deadLettered = true
557
- upsertWebhookRetry(entry.id, entry)
558
- console.warn(`[webhook-retry] Dead-lettered ${entry.id} after ${entry.attempts} attempts: ${errorMsg}`)
559
- const failure = classifyRuntimeFailure({ source: 'webhook', message: errorMsg })
560
- if (session?.id) {
561
- recordSupervisorIncident({
562
- runId: entry.id,
563
- sessionId: session.id as string,
564
- taskId: null,
565
- agentId: agentId || null,
566
- source: 'webhook',
567
- kind: 'runtime_failure',
568
- severity: failure.severity,
569
- summary: `Webhook delivery dead-lettered: ${errorMsg}`.slice(0, 320),
570
- details: errorMsg,
571
- failureFamily: failure.family,
572
- remediation: failure.remediation,
573
- repairPrompt: failure.repairPrompt,
574
- autoAction: null,
575
- })
576
- }
577
-
578
- appendWebhookLog(genId(8), {
579
- id: genId(8),
580
- webhookId: entry.webhookId,
581
- event: entry.event,
582
- payload: (entry.payload || '').slice(0, 2000),
583
- status: 'error',
584
- error: `Dead-lettered after ${entry.attempts} attempts: ${errorMsg}`,
585
- timestamp: Date.now(),
586
- })
587
- } else {
588
- // Exponential backoff: 30s * 2^attempt + random jitter (0-5000ms)
589
- const jitter = Math.floor(Math.random() * 5000)
590
- entry.nextRetryAt = Date.now() + (30_000 * Math.pow(2, entry.attempts)) + jitter
591
- upsertWebhookRetry(entry.id, entry)
592
- console.warn(`[webhook-retry] Retry ${entry.id} failed (attempt ${entry.attempts}/${entry.maxAttempts}), next at ${new Date(entry.nextRetryAt).toISOString()}: ${errorMsg}`)
593
- }
594
- }
595
- }
596
- }
597
-
598
- async function runProviderHealthChecks() {
599
- const agents = loadAgents()
600
- const credentials = loadCredentials()
601
-
602
- // Build deduplicated set of { provider, credentialId, apiEndpoint } tuples
603
- const seen = new Set<string>()
604
- const tuples: { provider: string; credentialId: string; apiEndpoint: string; agentId: string; credentialName: string }[] = []
605
-
606
- for (const agent of Object.values(agents) as unknown as Record<string, unknown>[]) {
607
- if (!agent?.id || typeof agent.id !== 'string') continue
608
- if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
609
- const provider = typeof agent.provider === 'string' ? agent.provider : ''
610
- if (!provider || ['claude-cli', 'codex-cli', 'opencode-cli'].includes(provider)) continue
611
-
612
- const credentialId = typeof agent.credentialId === 'string' ? agent.credentialId : ''
613
- const apiEndpoint = typeof agent.apiEndpoint === 'string' ? agent.apiEndpoint : ''
614
-
615
- // For OpenClaw, scope per agent (each may have a different gateway)
616
- const key = provider === 'openclaw'
617
- ? `openclaw:${agent.id}`
618
- : `${provider}:${credentialId || 'no-cred'}:${apiEndpoint}`
619
- if (seen.has(key)) continue
620
- seen.add(key)
621
-
622
- const cred = credentialId ? (credentials[credentialId] as unknown as Record<string, unknown> | undefined) : undefined
623
- const credName = typeof cred?.name === 'string' ? cred.name : provider
624
-
625
- tuples.push({
626
- provider,
627
- credentialId,
628
- apiEndpoint,
629
- agentId: agent.id,
630
- credentialName: credName,
631
- })
632
- }
633
-
634
- for (const tuple of tuples) {
635
- // Circuit breaker: skip providers that have failed repeatedly
636
- const cbKey = `${tuple.provider}:${tuple.credentialId || 'no-cred'}:${tuple.apiEndpoint}`
637
- const cb = ds.providerPingCircuitBreaker.get(cbKey)
638
- const now = Date.now()
639
- if (cb && cb.skipUntil > now) continue
640
-
641
- let apiKey: string | undefined
642
- if (tuple.credentialId) {
643
- const cred = credentials[tuple.credentialId] as unknown as Record<string, unknown> | undefined
644
- if (cred?.encryptedKey && typeof cred.encryptedKey === 'string') {
645
- try { apiKey = decryptKey(cred.encryptedKey) } catch { /* skip undecryptable */ continue }
646
- }
647
- }
648
-
649
- const endpoint = tuple.apiEndpoint || OPENAI_COMPATIBLE_DEFAULTS[tuple.provider]?.defaultEndpoint || undefined
650
- const result = await pingProvider(tuple.provider, apiKey, endpoint)
651
-
652
- if (!result.ok) {
653
- // Update circuit breaker state
654
- const existing = ds.providerPingCircuitBreaker.get(cbKey) || { consecutiveFailures: 0, skipUntil: 0 }
655
- existing.consecutiveFailures += 1
656
- if (existing.consecutiveFailures >= PROVIDER_PING_CB_THRESHOLD) {
657
- const cooldown = Math.min(
658
- PROVIDER_PING_CB_BASE_MS * Math.pow(2, existing.consecutiveFailures - PROVIDER_PING_CB_THRESHOLD),
659
- PROVIDER_PING_CB_MAX_MS,
660
- )
661
- existing.skipUntil = now + cooldown
662
- console.log(`[health] Circuit breaker tripped for ${tuple.credentialName} — skipping pings for ${Math.round(cooldown / 60_000)}m`)
663
- }
664
- ds.providerPingCircuitBreaker.set(cbKey, existing)
665
-
666
- if (!shouldNotifyProviderReachabilityIssue(tuple.provider)) {
667
- continue
668
- }
669
-
670
- const dedupKey = `provider-down:${tuple.credentialId || tuple.provider}`
671
-
672
- const entityType = tuple.credentialId ? 'credential' : undefined
673
- const entityId = tuple.credentialId || undefined
674
-
675
- createNotification({
676
- type: 'warning',
677
- title: `Provider unreachable: ${tuple.credentialName}`,
678
- message: result.message,
679
- dedupKey,
680
- entityType,
681
- entityId,
682
- })
683
- } else {
684
- // Success — clear circuit breaker
685
- ds.providerPingCircuitBreaker.delete(cbKey)
686
- }
687
- }
688
- }
689
-
690
- const OPENCLAW_REPAIR_MAX_ATTEMPTS = 3
691
- const OPENCLAW_REPAIR_COOLDOWN_MS = 300_000 // 5 minutes
692
-
693
- async function runOpenClawGatewayHealthChecks() {
694
- const agents = loadAgents()
695
- const credentials = loadCredentials()
696
-
697
- // Build deduplicated OpenClaw agent tuples
698
- const seen = new Set<string>()
699
- const tuples: { agentId: string; endpoint: string; credentialId: string; credentialName: string }[] = []
700
-
701
- for (const agent of Object.values(agents) as unknown as Record<string, unknown>[]) {
702
- if (!agent?.id || typeof agent.id !== 'string') continue
703
- if (shouldSuppressSyntheticAgentHealthAlert(agent.id)) continue
704
- if (agent.provider !== 'openclaw') continue
705
-
706
- const key = `openclaw:${agent.id}`
707
- if (seen.has(key)) continue
708
- seen.add(key)
709
-
710
- const credentialId = typeof agent.credentialId === 'string' ? agent.credentialId : ''
711
- const endpoint = typeof agent.apiEndpoint === 'string' ? agent.apiEndpoint : ''
712
- const cred = credentialId ? (credentials[credentialId] as unknown as Record<string, unknown> | undefined) : undefined
713
- const credName = typeof cred?.name === 'string' ? cred.name : 'openclaw'
714
-
715
- tuples.push({ agentId: agent.id, endpoint, credentialId, credentialName: credName })
716
- }
717
-
718
- if (!tuples.length) return
719
-
720
- const { probeOpenClawHealth } = await import('@/lib/server/openclaw/health')
721
-
722
- for (const tuple of tuples) {
723
- let token: string | undefined
724
- if (tuple.credentialId) {
725
- const cred = credentials[tuple.credentialId] as unknown as Record<string, unknown> | undefined
726
- if (cred?.encryptedKey && typeof cred.encryptedKey === 'string') {
727
- try { token = decryptKey(cred.encryptedKey) } catch { continue }
728
- }
729
- }
730
-
731
- const result = await probeOpenClawHealth({
732
- endpoint: tuple.endpoint || undefined,
733
- token,
734
- timeoutMs: 10_000,
735
- })
736
-
737
- const now = Date.now()
738
-
739
- if (result.ok) {
740
- // Recovered
741
- if (ds.openclawDownAgentIds.has(tuple.agentId)) {
742
- ds.openclawDownAgentIds.delete(tuple.agentId)
743
- ds.openclawRepairState.delete(tuple.agentId)
744
- createNotification({
745
- type: 'success',
746
- title: 'OpenClaw gateway recovered',
747
- message: `Gateway for ${tuple.credentialName} is reachable again.`,
748
- dedupKey: `openclaw-gw-down:${tuple.agentId}`,
749
- })
750
- }
751
- continue
752
- }
753
-
754
- // Unhealthy
755
- const repair = ds.openclawRepairState.get(tuple.agentId) || { attempts: 0, lastAttemptAt: 0, cooldownUntil: 0 }
756
-
757
- // In cooldown — skip
758
- if (repair.cooldownUntil > now) continue
759
-
760
- // Cooldown expired — reset
761
- if (repair.cooldownUntil > 0 && repair.cooldownUntil <= now) {
762
- repair.attempts = 0
763
- repair.cooldownUntil = 0
764
- }
765
-
766
- ds.openclawDownAgentIds.add(tuple.agentId)
767
-
768
- if (repair.attempts < OPENCLAW_REPAIR_MAX_ATTEMPTS) {
769
- try {
770
- const { runOpenClawDoctor } = await import('@/lib/server/openclaw/doctor')
771
- await runOpenClawDoctor({ fix: true })
772
- } catch (err: unknown) {
773
- console.warn('[daemon] openclaw doctor --fix failed:', errorMessage(err))
774
- }
775
- repair.attempts += 1
776
- repair.lastAttemptAt = now
777
- } else {
778
- repair.cooldownUntil = now + OPENCLAW_REPAIR_COOLDOWN_MS
779
- }
780
-
781
- ds.openclawRepairState.set(tuple.agentId, repair)
782
-
783
- createNotification({
784
- type: 'error',
785
- title: `OpenClaw gateway unreachable: ${tuple.credentialName}`,
786
- message: result.error || 'Health check failed',
787
- dedupKey: `openclaw-gw-down:${tuple.agentId}`,
788
- })
789
- }
790
- }
791
-
792
- /**
793
- * Prune orphaned entries from module-level Maps/Sets that reference
794
- * sessions, connectors, or agents that no longer exist in storage.
795
- * Runs every health-check cycle (2 minutes).
796
- */
797
- function pruneOrphanedState(sessions: Record<string, unknown>): void {
798
- const liveSessionIds = new Set(Object.keys(sessions))
799
-
800
- // Main-loop state map (per-session autonomous state)
801
- pruneMainLoopState(liveSessionIds)
802
-
803
- // Heartbeat service tracking maps
804
- pruneHeartbeatState(liveSessionIds)
805
-
806
- // System event queues for dead sessions
807
- pruneSystemEventQueues(liveSessionIds)
808
-
809
- // Process manager — sweep completed processes older than TTL
810
- sweepManagedProcesses()
811
-
812
- // Reap orphaned sandbox containers from prior crashes
813
- reapOrphanedSandboxContainers().catch((err) => {
814
- console.warn('[daemon] Orphaned sandbox reap failed:', typeof err === 'object' && err !== null && 'message' in err ? (err as Error).message : String(err))
815
- })
816
-
817
- // Daemon-local: prune openclawRepairState for agents that no longer exist
818
- const agents = loadAgents()
819
- for (const agentId of ds.openclawRepairState.keys()) {
820
- if (!agents[agentId]) ds.openclawRepairState.delete(agentId)
821
- }
822
- for (const agentId of ds.openclawDownAgentIds) {
823
- if (!agents[agentId]) ds.openclawDownAgentIds.delete(agentId)
824
- }
825
-
826
- // Orchestrator event queues for dead agents
827
- pruneOrchestratorEventQueues(new Set(Object.keys(agents)))
828
-
829
- // Prune circuit breaker entries for providers that no longer have any agent referencing them
830
- const liveProviderKeys = new Set<string>()
831
- for (const agent of Object.values(agents) as unknown as Record<string, unknown>[]) {
832
- if (!agent?.id) continue
833
- const p = typeof agent.provider === 'string' ? agent.provider : ''
834
- const c = typeof agent.credentialId === 'string' ? agent.credentialId : ''
835
- const e = typeof agent.apiEndpoint === 'string' ? agent.apiEndpoint : ''
836
- if (p) liveProviderKeys.add(`${p}:${c || 'no-cred'}:${e}`)
837
- }
838
- for (const key of ds.providerPingCircuitBreaker.keys()) {
839
- if (!liveProviderKeys.has(key)) ds.providerPingCircuitBreaker.delete(key)
840
- }
841
- }
842
-
843
- async function runMemoryMaintenanceTick(): Promise<void> {
844
- try {
845
- const memDb = getMemoryDb()
846
- const result = memDb.maintain({ dedupe: true, pruneWorking: true, ttlHours: 24 })
847
- if (result.deduped > 0 || result.pruned > 0) {
848
- console.log(`[daemon] Memory maintenance: deduped=${result.deduped}, pruned=${result.pruned}`)
849
- }
850
- } catch (err: unknown) {
851
- console.warn('[daemon] Memory maintenance tick failed:', err instanceof Error ? err.message : String(err))
852
- }
853
- }
854
-
855
- async function runHealthChecks() {
856
- // Continuously keep the completed queue honest.
857
- validateCompletedTasksQueue()
858
- recoverStalledRunningTasks()
859
-
860
- // Watchdog: abort runs stuck in running state beyond their timeout threshold.
861
- try {
862
- const stuck = sweepStuckRuns()
863
- if (stuck.aborted > 0) {
864
- console.log(`[daemon] Watchdog: aborted ${stuck.aborted} stuck run(s)`)
865
- }
866
- } catch (err: unknown) {
867
- console.error('[daemon] Stuck-run watchdog failed:', err instanceof Error ? err.message : String(err))
868
- }
869
-
870
- // Keep heartbeat state in sync with task terminal states even without daemon restarts.
871
- cleanupFinishedTaskSessions()
872
-
873
- // Re-queue deferred tasks whose agents have become available again.
874
- try { promoteDeferred() } catch {}
875
-
876
- const sessions = loadSessions()
877
- const now = Date.now()
878
- const currentlyStale = new Set<string>()
879
- const dirtySessionIds: string[] = []
880
-
881
- for (const session of Object.values(sessions) as unknown as Record<string, unknown>[]) {
882
- if (!session?.id || typeof session.id !== 'string') continue
883
- if (session.heartbeatEnabled !== true) continue
884
-
885
- const sessionId = session.id
886
- if (shouldSuppressSessionHeartbeatHealthAlert(session as Pick<Session, 'id' | 'name' | 'user' | 'shortcutForAgentId'>)) {
887
- ds.staleSessionIds.delete(sessionId)
888
- continue
889
- }
890
-
891
- const sessionLabel = String(session.name || sessionId)
892
- const intervalSec = parseHeartbeatIntervalSec(session.heartbeatIntervalSec, DEFAULT_HEARTBEAT_INTERVAL_SEC)
893
- if (intervalSec <= 0) continue
894
- const staleAfter = Math.max(intervalSec * STALE_MULTIPLIER * 1000, STALE_MIN_MS)
895
- const lastActive = typeof session.lastActiveAt === 'number' ? session.lastActiveAt : 0
896
- if (lastActive <= 0) continue
897
-
898
- const staleForMs = now - lastActive
899
- if (staleForMs > staleAfter) {
900
- const autoDisableAfter = Math.max(intervalSec * STALE_AUTO_DISABLE_MULTIPLIER * 1000, STALE_AUTO_DISABLE_MIN_MS)
901
- if (staleForMs > autoDisableAfter) {
902
- session.heartbeatEnabled = false
903
- session.lastActiveAt = now
904
- dirtySessionIds.push(sessionId)
905
- ds.staleSessionIds.delete(sessionId)
906
- await sendHealthAlert({
907
- text: `Auto-disabled heartbeat for stale session "${sessionLabel}" after ${Math.round(staleForMs / 60_000)}m of inactivity.`,
908
- dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'auto-disabled'),
909
- entityType: 'session',
910
- entityId: sessionId,
911
- })
912
- continue
913
- }
914
-
915
- currentlyStale.add(sessionId)
916
- // Only alert on transition from healthy → stale (once per stale episode)
917
- if (!ds.staleSessionIds.has(sessionId)) {
918
- ds.staleSessionIds.add(sessionId)
919
- await sendHealthAlert({
920
- text: `Session "${sessionLabel}" heartbeat appears stale (last active ${(Math.round(staleForMs / 1000))}s ago, interval ${intervalSec}s).`,
921
- dedupKey: buildSessionHeartbeatHealthDedupKey(sessionId, 'stale'),
922
- entityType: 'session',
923
- entityId: sessionId,
924
- })
925
- }
926
- }
927
- }
928
-
929
- // Clear recovered sessions so they can re-alert if they go stale again later
930
- for (const id of ds.staleSessionIds) {
931
- if (!currentlyStale.has(id)) {
932
- ds.staleSessionIds.delete(id)
933
- }
934
- }
935
-
936
- for (const sid of dirtySessionIds) {
937
- const s = sessions[sid]
938
- if (s) {
939
- const { upsertSession: upsert } = await import('@/lib/server/storage')
940
- upsert(sid, s)
941
- }
942
- }
943
-
944
- // Provider reachability checks
945
- try {
946
- await runProviderHealthChecks()
947
- } catch (err: unknown) {
948
- console.error('[daemon] Provider health check failed:', errorMessage(err))
949
- }
950
-
951
- // OpenClaw gateway health checks + auto-repair
952
- try {
953
- await runOpenClawGatewayHealthChecks()
954
- } catch (err: unknown) {
955
- console.error('[daemon] OpenClaw gateway health check failed:', errorMessage(err))
956
- }
957
-
958
- // Integrity drift monitoring for identity/config/extension files.
959
- try {
960
- const integrity = runIntegrityMonitor(loadSettings())
961
- ds.lastIntegrityCheckAt = integrity.checkedAt
962
- ds.lastIntegrityDriftCount = integrity.drifts.length
963
- if (integrity.drifts.length > 0) {
964
- for (const drift of integrity.drifts) {
965
- const rel = path.relative(process.cwd(), drift.filePath)
966
- const shortPath = rel && !rel.startsWith('..') ? rel : drift.filePath
967
- const action = drift.type === 'created'
968
- ? 'created'
969
- : drift.type === 'deleted'
970
- ? 'deleted'
971
- : 'modified'
972
- createNotification({
973
- type: drift.type === 'deleted' ? 'error' : 'warning',
974
- title: `Integrity drift detected (${drift.kind})`,
975
- message: `${shortPath} was ${action}.`,
976
- dedupKey: `integrity:${drift.id}:${drift.nextHash || 'missing'}`,
977
- entityType: 'session',
978
- entityId: drift.id,
979
- })
980
- }
981
- await sendHealthAlert(`Integrity monitor detected ${integrity.drifts.length} file drift event(s).`)
982
- }
983
- } catch (err: unknown) {
984
- console.error('[daemon] Integrity monitor check failed:', errorMessage(err))
985
- }
986
-
987
- // Process webhook retry queue
988
- try {
989
- await processWebhookRetries()
990
- } catch (err: unknown) {
991
- console.error('[daemon] Webhook retry processing failed:', errorMessage(err))
992
- }
993
-
994
- // Periodic memory hygiene: prune orphaned state for deleted sessions/connectors
995
- try {
996
- pruneOrphanedState(sessions)
997
- } catch (err: unknown) {
998
- console.error('[daemon] Memory hygiene sweep failed:', errorMessage(err))
999
- }
1000
-
1001
- // Prune old terminal runs and their events to prevent unbounded growth
1002
- try {
1003
- const pruned = pruneOldRuns()
1004
- if (pruned.prunedRuns > 0 || pruned.prunedEvents > 0) {
1005
- console.log(`[daemon] Pruned ${pruned.prunedRuns} old run(s) and ${pruned.prunedEvents} run event(s)`)
1006
- }
1007
- } catch (err: unknown) {
1008
- console.error('[daemon] Run pruning failed:', err instanceof Error ? err.message : String(err))
1009
- }
1010
-
1011
- // Prune expired runtime locks
1012
- try {
1013
- const locksRemoved = pruneExpiredLocks()
1014
- if (locksRemoved > 0) {
1015
- console.log(`[daemon] Pruned ${locksRemoved} expired lock(s)`)
1016
- }
1017
- } catch (err: unknown) {
1018
- console.error('[daemon] Lock pruning failed:', err instanceof Error ? err.message : String(err))
1019
- }
1020
-
1021
- // Periodic memory database maintenance (dedup + TTL pruning)
1022
- try {
1023
- await runMemoryMaintenanceTick()
1024
- } catch (err: unknown) {
1025
- console.error('[daemon] Memory maintenance failed:', err instanceof Error ? err.message : String(err))
1026
- }
1027
-
1028
- // Drain idle-window callbacks when the system is quiet
1029
- try {
1030
- await drainIdleWindowCallbacks()
1031
- } catch (err: unknown) {
1032
- console.error('[daemon] Idle-window drain failed:', err instanceof Error ? err.message : String(err))
1033
- }
1034
- }
1035
-
1036
- function startHealthMonitor() {
1037
- if (ds.healthIntervalId) return
1038
- ds.healthIntervalId = setInterval(() => {
1039
- if (ds.healthCheckRunning || ds.shuttingDown) return
1040
- ds.healthCheckRunning = true
1041
- runHealthChecks()
1042
- .catch((err) => {
1043
- console.error('[daemon] Health monitor tick failed:', err?.message || String(err))
1044
- })
1045
- .finally(() => { ds.healthCheckRunning = false })
1046
- }, HEALTH_CHECK_INTERVAL)
1047
- }
1048
-
1049
- function stopHealthMonitor() {
1050
- if (ds.healthIntervalId) {
1051
- clearInterval(ds.healthIntervalId)
1052
- ds.healthIntervalId = null
1053
- }
1054
- }
1055
-
1056
- function syncDaemonBackgroundServices(options?: { runConnectorHealthCheckImmediately?: boolean }) {
1057
- if (isDaemonBackgroundServicesEnabled()) {
1058
- startHealthMonitor()
1059
- startConnectorHealthMonitor({
1060
- runImmediately: options?.runConnectorHealthCheckImmediately !== false,
1061
- })
1062
- startConnectorOutboxWorker()
1063
- startEvalScheduler()
1064
- return
1065
- }
1066
- stopHealthMonitor()
1067
- stopConnectorHealthMonitor()
1068
- stopConnectorOutboxWorker()
1069
- stopEvalScheduler()
1070
- }
1071
-
1072
- function startConnectorHealthMonitor(options?: { runImmediately?: boolean }) {
1073
- if (ds.connectorHealthIntervalId) return
1074
-
1075
- const tick = () => {
1076
- if (ds.connectorHealthCheckRunning || ds.shuttingDown) return
1077
- ds.connectorHealthCheckRunning = true
1078
- runConnectorHealthChecks(Date.now())
1079
- .catch((err) => {
1080
- console.error('[daemon] Connector health tick failed:', errorMessage(err))
1081
- })
1082
- .finally(() => { ds.connectorHealthCheckRunning = false })
1083
- }
1084
-
1085
- if (options?.runImmediately !== false) tick()
1086
- ds.connectorHealthIntervalId = setInterval(tick, CONNECTOR_HEALTH_CHECK_INTERVAL)
1087
- }
1088
-
1089
- function stopConnectorHealthMonitor() {
1090
- if (ds.connectorHealthIntervalId) {
1091
- clearInterval(ds.connectorHealthIntervalId)
1092
- ds.connectorHealthIntervalId = null
1093
- }
1094
- }
1095
-
1096
- function runConsolidationTick() {
1097
- import('@/lib/server/memory/memory-consolidation').then(({ runDailyConsolidation, registerConsolidationIdleCallback, registerCompactionIdleCallback }) => {
1098
- // Wire idle-window callbacks so consolidation and compaction run during quiet periods
1099
- registerConsolidationIdleCallback()
1100
- registerCompactionIdleCallback()
1101
-
1102
- return runDailyConsolidation().then((stats) => {
1103
- if (stats.digests > 0 || stats.pruned > 0 || stats.deduped > 0) {
1104
- console.log(`[daemon] Memory consolidation: ${stats.digests} digest(s), ${stats.pruned} pruned, ${stats.deduped} deduped`)
1105
- }
1106
- if (stats.errors.length > 0) {
1107
- console.warn(`[daemon] Memory consolidation errors: ${stats.errors.join('; ')}`)
1108
- }
1109
- })
1110
- }).catch((err: unknown) => {
1111
- console.error('[daemon] Memory consolidation failed:', errorMessage(err))
1112
- })
1113
- }
1114
-
1115
- function startMemoryConsolidation() {
1116
- if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) return
1117
- // Deferred first run, then repeat on interval
1118
- ds.memoryConsolidationTimeoutId = setTimeout(() => {
1119
- ds.memoryConsolidationTimeoutId = null
1120
- runConsolidationTick()
1121
- ds.memoryConsolidationIntervalId = setInterval(runConsolidationTick, MEMORY_CONSOLIDATION_INTERVAL)
1122
- }, MEMORY_CONSOLIDATION_INITIAL_DELAY)
1123
- }
1124
-
1125
- function stopMemoryConsolidation() {
1126
- if (ds.memoryConsolidationTimeoutId) {
1127
- clearTimeout(ds.memoryConsolidationTimeoutId)
1128
- ds.memoryConsolidationTimeoutId = null
1129
- }
1130
- if (ds.memoryConsolidationIntervalId) {
1131
- clearInterval(ds.memoryConsolidationIntervalId)
1132
- ds.memoryConsolidationIntervalId = null
1133
- }
1134
- }
1135
-
1136
- // --- Eval scheduler ---
1137
-
1138
- const EVAL_DEFAULT_INTERVAL_MS = 24 * 3600_000 // 24 hours
1139
-
1140
- async function runEvalSchedulerTick() {
1141
- try {
1142
- const settings = loadSettings()
1143
- if (!settings.autonomyEvalEnabled) return
1144
-
1145
- const { runEvalSuite } = await import('@/lib/server/eval/runner')
1146
- const agents = loadAgents()
1147
- const heartbeatAgentIds = Object.keys(agents).filter(
1148
- (id) => agents[id].heartbeatEnabled === true,
1149
- )
1150
-
1151
- for (const agentId of heartbeatAgentIds) {
1152
- try {
1153
- const result = await runEvalSuite(agentId)
1154
- console.log(
1155
- `[daemon:eval] Agent ${agents[agentId].name}: ${result.percentage}% (${result.totalScore}/${result.maxScore})`,
1156
- )
1157
- createNotification({
1158
- title: `Eval: ${agents[agentId].name} scored ${result.percentage}%`,
1159
- message: `${result.runs.length} scenarios, ${result.totalScore}/${result.maxScore} points`,
1160
- type: result.percentage >= 60 ? 'info' : 'warning',
1161
- })
1162
- } catch (err: unknown) {
1163
- console.error(`[daemon:eval] Failed for agent ${agentId}:`, errorMessage(err))
1164
- }
1165
- }
1166
- } catch (err: unknown) {
1167
- console.error('[daemon:eval] Scheduler tick error:', errorMessage(err))
1168
- }
1169
- }
1170
-
1171
- function startEvalScheduler() {
1172
- if (ds.evalSchedulerIntervalId) return
1173
- try {
1174
- const settings = loadSettings()
1175
- if (!settings.autonomyEvalEnabled) return
1176
- const intervalMs = parseCronToMs(settings.autonomyEvalCron, EVAL_DEFAULT_INTERVAL_MS) || EVAL_DEFAULT_INTERVAL_MS
1177
- ds.evalSchedulerIntervalId = setInterval(runEvalSchedulerTick, intervalMs)
1178
- console.log(`[daemon:eval] Eval scheduler started (interval=${Math.round(intervalMs / 3600_000)}h)`)
1179
- } catch {
1180
- // Eval scheduling is optional — don't block daemon start
1181
- }
1182
- }
1183
-
1184
- function stopEvalScheduler() {
1185
- if (ds.evalSchedulerIntervalId) {
1186
- clearInterval(ds.evalSchedulerIntervalId)
1187
- ds.evalSchedulerIntervalId = null
1188
- }
1189
- }
1190
-
1191
- const SWARM_TIMEOUT_CHECK_INTERVAL = 30_000
1192
-
1193
- function startSwarmTimeoutChecker() {
1194
- if (ds.swarmTimeoutIntervalId) return
1195
- ds.swarmTimeoutIntervalId = setInterval(() => {
1196
- if (!ds.running || ds.shuttingDown) return
1197
- try {
1198
- checkSwarmTimeouts()
1199
- } catch (err: unknown) {
1200
- console.error(`[daemon] Swarm timeout check error: ${errorMessage(err)}`)
1201
- }
1202
- }, SWARM_TIMEOUT_CHECK_INTERVAL)
1203
- }
1204
-
1205
- function stopSwarmTimeoutChecker() {
1206
- if (ds.swarmTimeoutIntervalId) {
1207
- clearInterval(ds.swarmTimeoutIntervalId)
1208
- ds.swarmTimeoutIntervalId = null
1209
- }
1210
- }
1211
-
1212
- function refreshDaemonTimersForHotReload() {
1213
- if (!ds.running) return
1214
-
1215
- if (ds.queueIntervalId) {
1216
- clearInterval(ds.queueIntervalId)
1217
- ds.queueIntervalId = null
1218
- startQueueProcessor()
1219
- }
1220
-
1221
- if (ds.browserSweepId) {
1222
- clearInterval(ds.browserSweepId)
1223
- ds.browserSweepId = null
1224
- startBrowserSweep()
1225
- }
1226
-
1227
- if (ds.healthIntervalId) {
1228
- clearInterval(ds.healthIntervalId)
1229
- ds.healthIntervalId = null
1230
- }
1231
-
1232
- if (ds.connectorHealthIntervalId) {
1233
- clearInterval(ds.connectorHealthIntervalId)
1234
- ds.connectorHealthIntervalId = null
1235
- }
1236
-
1237
- if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) {
1238
- stopMemoryConsolidation()
1239
- startMemoryConsolidation()
1240
- }
1241
-
1242
- if (ds.evalSchedulerIntervalId) {
1243
- stopEvalScheduler()
1244
- }
1245
-
1246
- if (ds.swarmTimeoutIntervalId) {
1247
- stopSwarmTimeoutChecker()
1248
- startSwarmTimeoutChecker()
1249
- }
1250
-
1251
- syncDaemonBackgroundServices()
1252
- }
1253
-
1254
- // In dev/HMR, the daemon state survives on globalThis while interval callbacks keep
1255
- // the old module closure alive. Refresh long-lived timers so they always run the
1256
- // current module's logic instead of stale health-alert code paths.
1257
- refreshDaemonTimersForHotReload()
1258
-
1259
- export async function runDaemonHealthCheckNow() {
1260
- // Bypass circuit breaker for manual/forced checks
1261
- ds.providerPingCircuitBreaker.clear()
1262
- await Promise.all([
1263
- runHealthChecks(),
1264
- runConnectorHealthChecks(Date.now()),
1265
- ])
1266
- }
1267
-
1268
- export async function runConnectorHealthCheckNowForTest(now = Date.now()) {
1269
- await runConnectorHealthChecks(now)
1270
- }
1271
-
1272
- export function getDaemonStatus() {
1273
- const estop = loadEstopState()
1274
- const queue = loadQueue()
1275
- const schedules = loadSchedules()
1276
- const reconnectStates = Object.values(getAllReconnectStates())
1277
-
1278
- // Find next scheduled task
1279
- let nextScheduled: number | null = null
1280
- for (const s of Object.values(schedules) as unknown as Record<string, unknown>[]) {
1281
- if (s.status === 'active' && s.nextRunAt) {
1282
- if (!nextScheduled || (s.nextRunAt as number) < nextScheduled) {
1283
- nextScheduled = s.nextRunAt as number
1284
- }
1285
- }
1286
- }
1287
-
1288
- // Webhook retry queue stats
1289
- const retryQueue = loadWebhookRetryQueue()
1290
- const retryEntries = Object.values(retryQueue) as WebhookRetryEntry[]
1291
- const pendingRetries = retryEntries.filter(e => !e.deadLettered).length
1292
- const deadLettered = retryEntries.filter(e => e.deadLettered).length
1293
-
1294
- return {
1295
- running: ds.running,
1296
- schedulerActive: ds.running,
1297
- autostartEnabled: daemonAutostartEnvEnabled(),
1298
- backgroundServicesEnabled: isDaemonBackgroundServicesEnabled(),
1299
- reducedMode: !isDaemonBackgroundServicesEnabled(),
1300
- manualStopRequested: ds.manualStopRequested,
1301
- estop,
1302
- queueLength: queue.length,
1303
- lastProcessed: ds.lastProcessedAt,
1304
- nextScheduled,
1305
- heartbeat: getHeartbeatServiceStatus(),
1306
- health: {
1307
- monitorActive: !!ds.healthIntervalId,
1308
- connectorMonitorActive: !!ds.connectorHealthIntervalId,
1309
- staleSessions: ds.staleSessionIds.size,
1310
- connectorsInBackoff: reconnectStates.filter((state) => !state.exhausted).length,
1311
- connectorsExhausted: reconnectStates.filter((state) => state.exhausted).length,
1312
- checkIntervalSec: Math.trunc(HEALTH_CHECK_INTERVAL / 1000),
1313
- connectorCheckIntervalSec: Math.trunc(CONNECTOR_HEALTH_CHECK_INTERVAL / 1000),
1314
- integrity: {
1315
- enabled: loadSettings().integrityMonitorEnabled !== false,
1316
- lastCheckedAt: ds.lastIntegrityCheckAt,
1317
- lastDriftCount: ds.lastIntegrityDriftCount,
1318
- },
1319
- },
1320
- webhookRetry: {
1321
- pendingRetries,
1322
- deadLettered,
1323
- },
1324
- guards: {
1325
- healthCheckRunning: ds.healthCheckRunning,
1326
- connectorHealthCheckRunning: ds.connectorHealthCheckRunning,
1327
- shuttingDown: ds.shuttingDown,
1328
- providerCircuitBreakers: ds.providerPingCircuitBreaker.size,
1329
- },
1330
- }
1331
- }
1
+ export * from './daemon-state/policy'
2
+ export * from './daemon-state/supervisor'
3
+ export * from './daemon-state/health'