@swarmclawai/swarmclaw 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +19 -0
  2. package/package.json +5 -2
  3. package/skills/coding-agent/SKILL.md +111 -0
  4. package/skills/github/SKILL.md +140 -0
  5. package/skills/nano-banana-pro/SKILL.md +62 -0
  6. package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
  7. package/skills/nano-pdf/SKILL.md +53 -0
  8. package/skills/openai-image-gen/SKILL.md +78 -0
  9. package/skills/openai-image-gen/scripts/gen.py +328 -0
  10. package/skills/resourceful-problem-solving/SKILL.md +49 -0
  11. package/skills/skill-creator/SKILL.md +147 -0
  12. package/skills/skill-creator/scripts/init_skill.py +378 -0
  13. package/skills/skill-creator/scripts/quick_validate.py +159 -0
  14. package/skills/summarize/SKILL.md +77 -0
  15. package/src/app/api/auth/route.ts +20 -5
  16. package/src/app/api/chats/[id]/deploy/route.ts +11 -6
  17. package/src/app/api/chats/[id]/devserver/route.ts +17 -20
  18. package/src/app/api/chats/[id]/messages/route.ts +15 -11
  19. package/src/app/api/chats/[id]/route.ts +9 -10
  20. package/src/app/api/chats/[id]/stop/route.ts +5 -7
  21. package/src/app/api/chats/messages-route.test.ts +8 -6
  22. package/src/app/api/chats/route.ts +9 -10
  23. package/src/app/api/credentials/[id]/route.ts +4 -1
  24. package/src/app/api/extensions/marketplace/route.ts +5 -2
  25. package/src/app/api/ip/route.ts +2 -2
  26. package/src/app/api/memory/maintenance/route.ts +5 -2
  27. package/src/app/api/preview-server/route.ts +15 -12
  28. package/src/app/api/projects/[id]/route.ts +7 -46
  29. package/src/app/api/system/status/route.ts +11 -0
  30. package/src/app/api/upload/route.ts +4 -1
  31. package/src/cli/index.js +7 -0
  32. package/src/cli/spec.js +1 -0
  33. package/src/components/agents/agent-files-editor.tsx +44 -32
  34. package/src/components/agents/personality-builder.tsx +13 -7
  35. package/src/components/agents/trash-list.tsx +1 -1
  36. package/src/components/chat/chat-area.tsx +45 -23
  37. package/src/components/chat/message-bubble.test.ts +35 -0
  38. package/src/components/chat/message-bubble.tsx +20 -9
  39. package/src/components/chat/message-list.tsx +62 -42
  40. package/src/components/chat/swarm-status-card.tsx +10 -3
  41. package/src/components/input/chat-input.tsx +34 -14
  42. package/src/components/layout/daemon-indicator.tsx +7 -8
  43. package/src/components/layout/update-banner.tsx +8 -13
  44. package/src/components/logs/log-list.tsx +1 -1
  45. package/src/components/memory/memory-card.tsx +3 -1
  46. package/src/components/org-chart/org-chart-view.tsx +4 -0
  47. package/src/components/projects/project-list.tsx +4 -2
  48. package/src/components/projects/tabs/overview-tab.tsx +3 -2
  49. package/src/components/secrets/secret-sheet.tsx +1 -1
  50. package/src/components/secrets/secrets-list.tsx +1 -1
  51. package/src/components/shared/agent-switch-dialog.tsx +12 -6
  52. package/src/components/shared/dir-browser.tsx +22 -18
  53. package/src/components/skills/skill-sheet.tsx +2 -3
  54. package/src/components/tasks/task-list.tsx +1 -1
  55. package/src/components/tasks/task-sheet.tsx +1 -1
  56. package/src/hooks/use-openclaw-gateway.ts +46 -27
  57. package/src/instrumentation.ts +10 -7
  58. package/src/lib/chat/assistant-render-id.ts +3 -0
  59. package/src/lib/chat/chat-streaming-state.test.ts +42 -3
  60. package/src/lib/chat/chat-streaming-state.ts +20 -8
  61. package/src/lib/chat/chat.ts +18 -2
  62. package/src/lib/chat/queued-message-queue.test.ts +23 -1
  63. package/src/lib/chat/queued-message-queue.ts +11 -2
  64. package/src/lib/providers/anthropic.ts +6 -3
  65. package/src/lib/providers/claude-cli.ts +9 -3
  66. package/src/lib/providers/cli-utils.test.ts +124 -0
  67. package/src/lib/providers/cli-utils.ts +15 -0
  68. package/src/lib/providers/codex-cli.ts +9 -3
  69. package/src/lib/providers/gemini-cli.ts +6 -2
  70. package/src/lib/providers/index.ts +4 -1
  71. package/src/lib/providers/ollama.ts +5 -2
  72. package/src/lib/providers/openai.ts +8 -5
  73. package/src/lib/providers/opencode-cli.ts +6 -2
  74. package/src/lib/server/activity/activity-log.ts +21 -0
  75. package/src/lib/server/agents/agent-availability.test.ts +10 -5
  76. package/src/lib/server/agents/agent-cascade.ts +79 -59
  77. package/src/lib/server/agents/agent-registry.ts +23 -4
  78. package/src/lib/server/agents/agent-repository.ts +90 -0
  79. package/src/lib/server/agents/delegation-job-repository.ts +53 -0
  80. package/src/lib/server/agents/delegation-jobs.ts +11 -4
  81. package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
  82. package/src/lib/server/agents/guardian.ts +2 -2
  83. package/src/lib/server/agents/main-agent-loop.ts +14 -6
  84. package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
  85. package/src/lib/server/agents/subagent-runtime.ts +9 -6
  86. package/src/lib/server/agents/subagent-swarm.ts +3 -2
  87. package/src/lib/server/agents/task-session.ts +3 -4
  88. package/src/lib/server/approvals/approval-repository.ts +30 -0
  89. package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
  90. package/src/lib/server/autonomy/supervisor-reflection.ts +14 -1
  91. package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
  92. package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
  93. package/src/lib/server/chat-execution/chat-execution.ts +84 -1914
  94. package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
  95. package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
  96. package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
  97. package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
  98. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
  99. package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
  100. package/src/lib/server/chat-execution/continuation-evaluator.ts +4 -3
  101. package/src/lib/server/chat-execution/continuation-limits.ts +6 -3
  102. package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
  103. package/src/lib/server/chat-execution/message-classifier.ts +5 -2
  104. package/src/lib/server/chat-execution/post-stream-finalization.ts +5 -2
  105. package/src/lib/server/chat-execution/prompt-builder.ts +22 -1
  106. package/src/lib/server/chat-execution/prompt-sections.ts +55 -13
  107. package/src/lib/server/chat-execution/response-completeness.ts +5 -2
  108. package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
  109. package/src/lib/server/chat-execution/stream-agent-chat.ts +58 -25
  110. package/src/lib/server/chatrooms/chatroom-memory-bridge.ts +6 -3
  111. package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
  112. package/src/lib/server/connectors/bluebubbles.ts +7 -4
  113. package/src/lib/server/connectors/connector-inbound.ts +16 -13
  114. package/src/lib/server/connectors/connector-lifecycle.ts +11 -8
  115. package/src/lib/server/connectors/connector-outbound.ts +6 -3
  116. package/src/lib/server/connectors/connector-repository.ts +58 -0
  117. package/src/lib/server/connectors/discord.ts +10 -7
  118. package/src/lib/server/connectors/email.ts +17 -14
  119. package/src/lib/server/connectors/googlechat.ts +7 -4
  120. package/src/lib/server/connectors/inbound-audio-transcription.ts +5 -2
  121. package/src/lib/server/connectors/matrix.ts +6 -3
  122. package/src/lib/server/connectors/openclaw.ts +20 -17
  123. package/src/lib/server/connectors/outbox.ts +4 -1
  124. package/src/lib/server/connectors/runtime-state.test.ts +117 -0
  125. package/src/lib/server/connectors/runtime-state.ts +19 -0
  126. package/src/lib/server/connectors/session-consolidation.ts +5 -2
  127. package/src/lib/server/connectors/signal.ts +9 -6
  128. package/src/lib/server/connectors/slack.ts +13 -10
  129. package/src/lib/server/connectors/teams.ts +8 -5
  130. package/src/lib/server/connectors/telegram.ts +15 -12
  131. package/src/lib/server/connectors/whatsapp.ts +32 -29
  132. package/src/lib/server/credentials/credential-repository.ts +7 -0
  133. package/src/lib/server/embeddings.ts +4 -1
  134. package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
  135. package/src/lib/server/link-understanding.ts +4 -1
  136. package/src/lib/server/memory/memory-abstract.test.ts +59 -0
  137. package/src/lib/server/memory/memory-abstract.ts +59 -0
  138. package/src/lib/server/memory/memory-db.ts +40 -14
  139. package/src/lib/server/missions/mission-repository.ts +74 -0
  140. package/src/lib/server/missions/mission-service/actions.ts +6 -0
  141. package/src/lib/server/missions/mission-service/bindings.ts +9 -0
  142. package/src/lib/server/missions/mission-service/context.ts +4 -0
  143. package/src/lib/server/missions/mission-service/core.ts +2269 -0
  144. package/src/lib/server/missions/mission-service/queries.ts +12 -0
  145. package/src/lib/server/missions/mission-service/recovery.ts +5 -0
  146. package/src/lib/server/missions/mission-service/ticks.ts +9 -0
  147. package/src/lib/server/missions/mission-service.test.ts +9 -2
  148. package/src/lib/server/missions/mission-service.ts +6 -2263
  149. package/src/lib/server/openclaw/gateway.ts +8 -5
  150. package/src/lib/server/persistence/repository-utils.ts +154 -0
  151. package/src/lib/server/persistence/storage-context.ts +51 -0
  152. package/src/lib/server/persistence/transaction.ts +1 -0
  153. package/src/lib/server/project-utils.ts +13 -0
  154. package/src/lib/server/projects/project-repository.ts +36 -0
  155. package/src/lib/server/projects/project-service.ts +79 -0
  156. package/src/lib/server/protocols/protocol-agent-turn.ts +5 -2
  157. package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
  158. package/src/lib/server/protocols/protocol-run-lifecycle.ts +5 -2
  159. package/src/lib/server/protocols/protocol-step-helpers.ts +4 -1
  160. package/src/lib/server/provider-health.ts +18 -0
  161. package/src/lib/server/query-expansion.ts +4 -1
  162. package/src/lib/server/runtime/alert-dispatch.ts +8 -7
  163. package/src/lib/server/runtime/daemon-policy.ts +1 -1
  164. package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
  165. package/src/lib/server/runtime/daemon-state/health.ts +6 -0
  166. package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
  167. package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
  168. package/src/lib/server/runtime/daemon-state.test.ts +48 -0
  169. package/src/lib/server/runtime/daemon-state.ts +3 -1331
  170. package/src/lib/server/runtime/estop-repository.ts +4 -0
  171. package/src/lib/server/runtime/estop.ts +3 -1
  172. package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
  173. package/src/lib/server/runtime/heartbeat-service.ts +78 -34
  174. package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
  175. package/src/lib/server/runtime/idle-window.ts +6 -3
  176. package/src/lib/server/runtime/network.ts +11 -0
  177. package/src/lib/server/runtime/orchestrator-events.ts +2 -2
  178. package/src/lib/server/runtime/perf.ts +4 -1
  179. package/src/lib/server/runtime/process-manager.ts +7 -4
  180. package/src/lib/server/runtime/queue/claims.ts +4 -0
  181. package/src/lib/server/runtime/queue/core.ts +2079 -0
  182. package/src/lib/server/runtime/queue/execution.ts +7 -0
  183. package/src/lib/server/runtime/queue/followups.ts +4 -0
  184. package/src/lib/server/runtime/queue/queries.ts +12 -0
  185. package/src/lib/server/runtime/queue/recovery.ts +7 -0
  186. package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
  187. package/src/lib/server/runtime/queue-repository.ts +17 -0
  188. package/src/lib/server/runtime/queue.ts +5 -2058
  189. package/src/lib/server/runtime/run-ledger.ts +6 -5
  190. package/src/lib/server/runtime/run-repository.ts +73 -0
  191. package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
  192. package/src/lib/server/runtime/runtime-settings.ts +1 -1
  193. package/src/lib/server/runtime/runtime-state.ts +99 -0
  194. package/src/lib/server/runtime/scheduler.ts +13 -8
  195. package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
  196. package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
  197. package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
  198. package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
  199. package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
  200. package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
  201. package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
  202. package/src/lib/server/runtime/session-run-manager.ts +72 -1374
  203. package/src/lib/server/runtime/watch-job-repository.ts +35 -0
  204. package/src/lib/server/runtime/watch-jobs.ts +3 -1
  205. package/src/lib/server/sandbox/bridge-auth-registry.ts +6 -0
  206. package/src/lib/server/sandbox/novnc-auth.ts +10 -0
  207. package/src/lib/server/schedules/schedule-repository.ts +42 -0
  208. package/src/lib/server/session-tools/context.ts +14 -0
  209. package/src/lib/server/session-tools/discovery.ts +9 -6
  210. package/src/lib/server/session-tools/index.ts +3 -1
  211. package/src/lib/server/session-tools/platform.ts +1 -1
  212. package/src/lib/server/session-tools/subagent.ts +23 -2
  213. package/src/lib/server/session-tools/wallet.ts +4 -1
  214. package/src/lib/server/sessions/session-repository.ts +85 -0
  215. package/src/lib/server/settings/settings-repository.ts +25 -0
  216. package/src/lib/server/skills/clawhub-client.ts +4 -1
  217. package/src/lib/server/skills/runtime-skill-resolver.ts +8 -2
  218. package/src/lib/server/skills/skill-discovery.test.ts +2 -2
  219. package/src/lib/server/skills/skill-discovery.ts +2 -2
  220. package/src/lib/server/skills/skill-eligibility.ts +6 -0
  221. package/src/lib/server/skills/skill-repository.ts +14 -0
  222. package/src/lib/server/solana.ts +6 -0
  223. package/src/lib/server/storage-auth.ts +5 -5
  224. package/src/lib/server/storage-normalization.ts +4 -0
  225. package/src/lib/server/storage.ts +32 -32
  226. package/src/lib/server/tasks/task-followups.ts +4 -1
  227. package/src/lib/server/tasks/task-repository.ts +54 -0
  228. package/src/lib/server/tool-loop-detection.ts +8 -3
  229. package/src/lib/server/tool-planning.ts +226 -0
  230. package/src/lib/server/tool-retry.ts +4 -3
  231. package/src/lib/server/usage/usage-repository.ts +30 -0
  232. package/src/lib/server/wallet/wallet-portfolio.ts +29 -0
  233. package/src/lib/server/webhooks/webhook-repository.ts +10 -0
  234. package/src/lib/server/ws-hub.ts +5 -2
  235. package/src/lib/strip-internal-metadata.test.ts +78 -37
  236. package/src/lib/strip-internal-metadata.ts +20 -6
  237. package/src/stores/use-approval-store.ts +7 -1
  238. package/src/stores/use-chat-store.test.ts +54 -0
  239. package/src/stores/use-chat-store.ts +26 -6
  240. package/src/types/index.ts +6 -0
  241. /package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0
@@ -0,0 +1,238 @@
1
+ import { log } from '@/lib/server/logger'
2
+ import { errorMessage } from '@/lib/shared-utils'
3
+ import { isAllEstopEngaged, isAutonomyEstopEngaged } from '@/lib/server/runtime/estop'
4
+ import {
5
+ isRestartRecoverableSource,
6
+ listPersistedRuns,
7
+ loadRecoverableStaleRuns,
8
+ } from '@/lib/server/runtime/run-ledger'
9
+
10
+ import {
11
+ abortSessionRuntime,
12
+ clearDeferredDrain,
13
+ decrementNonHeartbeatWork,
14
+ deleteQueueEntry,
15
+ executionKeyForSession,
16
+ markPersistedRunInterrupted,
17
+ normalizeMode,
18
+ now,
19
+ reconcileSessionActivityLease,
20
+ recoveryState,
21
+ STALE_QUEUED_RUN_MS,
22
+ state,
23
+ syncRunRecord,
24
+ STUCK_RUN_THRESHOLD_MS,
25
+ } from './state'
26
+ import type { EnqueueSessionRunInput, SessionRunQueueEntry } from './types'
27
+
28
+ type EnqueueSessionRunFn = (input: EnqueueSessionRunInput) => unknown
29
+ type DrainExecutionFn = (executionKey: string) => Promise<void>
30
+
31
+ function resolveRecoveredQueuedEntry(entry: SessionRunQueueEntry, reason: string): void {
32
+ decrementNonHeartbeatWork(entry)
33
+ if (entry.run.status === 'completed' || entry.run.status === 'failed' || entry.run.status === 'cancelled') {
34
+ entry.run.endedAt = entry.run.endedAt || now()
35
+ } else {
36
+ entry.run.status = 'failed'
37
+ entry.run.endedAt = now()
38
+ }
39
+ entry.run.error = reason
40
+ syncRunRecord(entry.run)
41
+ entry.onEvents.forEach((send) => {
42
+ try {
43
+ send({ t: 'err', text: reason })
44
+ } catch {
45
+ // Subscriber stream can be closed by the client.
46
+ }
47
+ })
48
+ entry.resolve({
49
+ runId: entry.run.id,
50
+ sessionId: entry.run.sessionId,
51
+ ...(entry.run.missionId ? { missionId: entry.run.missionId } : {}),
52
+ text: '',
53
+ persisted: false,
54
+ toolEvents: [],
55
+ error: reason,
56
+ })
57
+ }
58
+
59
+ export function ensureRecoveredPersistedRuns(enqueueSessionRun: EnqueueSessionRunFn): void {
60
+ if (recoveryState.completed) return
61
+ recoveryState.completed = true
62
+ const staleRuns = loadRecoverableStaleRuns()
63
+ if (!staleRuns.length) return
64
+ const recoveryBlocked = isAutonomyEstopEngaged() || isAllEstopEngaged()
65
+
66
+ for (const run of staleRuns) {
67
+ const interrupted = markPersistedRunInterrupted(run, 'Interrupted by server restart before the run completed.')
68
+ const payload = interrupted.recoveryPayload
69
+ if (
70
+ recoveryBlocked
71
+ || interrupted.recoveredFromRestart
72
+ || !payload
73
+ || !isRestartRecoverableSource(interrupted.source)
74
+ ) {
75
+ continue
76
+ }
77
+
78
+ try {
79
+ enqueueSessionRun({
80
+ sessionId: interrupted.sessionId,
81
+ message: payload.message,
82
+ imagePath: payload.imagePath,
83
+ imageUrl: payload.imageUrl,
84
+ attachedFiles: payload.attachedFiles,
85
+ internal: payload.internal,
86
+ source: payload.source,
87
+ mode: normalizeMode(payload.mode, payload.internal),
88
+ dedupeKey: interrupted.dedupeKey,
89
+ maxRuntimeMs: payload.maxRuntimeMs,
90
+ modelOverride: payload.modelOverride,
91
+ heartbeatConfig: payload.heartbeatConfig,
92
+ replyToId: payload.replyToId,
93
+ executionGroupKey: payload.executionGroupKey,
94
+ recoveredFromRestart: true,
95
+ recoveredFromRunId: interrupted.id,
96
+ })
97
+ } catch (err: unknown) {
98
+ log.warn('session-run', `Failed to requeue interrupted run ${interrupted.id}`, {
99
+ sessionId: interrupted.sessionId,
100
+ error: errorMessage(err),
101
+ })
102
+ }
103
+ }
104
+ }
105
+
106
+ export function repairSessionRunQueue(
107
+ sessionId: string,
108
+ drainExecution: DrainExecutionFn,
109
+ opts?: {
110
+ executionKey?: string
111
+ maxQueuedAgeMs?: number
112
+ reason?: string
113
+ },
114
+ ): {
115
+ kickedExecutionKeys: number
116
+ recoveredQueuedRuns: number
117
+ } {
118
+ const maxQueuedAgeMs = Math.max(1_000, opts?.maxQueuedAgeMs ?? STALE_QUEUED_RUN_MS)
119
+ const reason = opts?.reason || 'Recovered stale queued run'
120
+ const targetExecutionKey = typeof opts?.executionKey === 'string' && opts.executionKey.trim()
121
+ ? opts.executionKey.trim()
122
+ : null
123
+ const queuedNow = now()
124
+ let kickedExecutionKeys = 0
125
+ let recoveredQueuedRuns = 0
126
+
127
+ for (const [executionKey, queue] of state.queueByExecution.entries()) {
128
+ if (targetExecutionKey && executionKey !== targetExecutionKey) continue
129
+ if (!queue.length) {
130
+ clearDeferredDrain(executionKey)
131
+ state.queueByExecution.delete(executionKey)
132
+ continue
133
+ }
134
+ if (state.runningByExecution.has(executionKey)) continue
135
+
136
+ const matching = queue.filter((entry) => entry.run.sessionId === sessionId)
137
+ if (!matching.length) continue
138
+
139
+ for (const entry of [...matching]) {
140
+ const missingPromise = !state.promises.has(entry.run.id)
141
+ const previousStatus = entry.run.status
142
+ const nonQueued = previousStatus !== 'queued'
143
+ const ageMs = Math.max(0, queuedNow - (entry.run.queuedAt || 0))
144
+ const stale = nonQueued || missingPromise || ageMs >= maxQueuedAgeMs
145
+ if (!stale) continue
146
+ if (!deleteQueueEntry(queue, entry)) continue
147
+ clearDeferredDrain(executionKey)
148
+ resolveRecoveredQueuedEntry(entry, reason)
149
+ recoveredQueuedRuns += 1
150
+ log.warn('session-run', `Recovered stale queued run ${entry.run.id}`, {
151
+ sessionId: entry.run.sessionId,
152
+ executionKey,
153
+ source: entry.run.source,
154
+ ageMs,
155
+ missingPromise,
156
+ previousStatus,
157
+ })
158
+ }
159
+
160
+ if (!queue.length) {
161
+ clearDeferredDrain(executionKey)
162
+ state.queueByExecution.delete(executionKey)
163
+ continue
164
+ }
165
+
166
+ if (queue.some((entry) => entry.run.sessionId === sessionId)) {
167
+ clearDeferredDrain(executionKey)
168
+ kickedExecutionKeys += 1
169
+ void drainExecution(executionKey)
170
+ }
171
+ }
172
+
173
+ if (recoveredQueuedRuns > 0) reconcileSessionActivityLease(sessionId)
174
+ return { kickedExecutionKeys, recoveredQueuedRuns }
175
+ }
176
+
177
+ export function sweepStuckRuns(enqueueSessionRun: EnqueueSessionRunFn): { aborted: number } {
178
+ const deadline = now()
179
+ let aborted = 0
180
+
181
+ for (const [execKey, entry] of state.runningByExecution.entries()) {
182
+ const age = deadline - (entry.run.startedAt || entry.run.queuedAt)
183
+ if (entry.maxRuntimeMs && age < entry.maxRuntimeMs * 1.5) continue
184
+ if (age < STUCK_RUN_THRESHOLD_MS) continue
185
+
186
+ abortSessionRuntime(entry, 'Watchdog: run exceeded maximum allowed duration')
187
+ state.runningByExecution.delete(execKey)
188
+ decrementNonHeartbeatWork(entry)
189
+ reconcileSessionActivityLease(entry.run.sessionId)
190
+ aborted++
191
+ }
192
+
193
+ const persistedRunning = listPersistedRuns({ status: 'running' })
194
+ for (const run of persistedRunning) {
195
+ const execKey = run.recoveryPayload?.executionGroupKey || executionKeyForSession(run.sessionId)
196
+ const inMemory = state.runningByExecution.get(execKey)
197
+ if (inMemory && inMemory.run.id === run.id) continue
198
+
199
+ const age = deadline - (run.startedAt || run.queuedAt)
200
+ if (age < STUCK_RUN_THRESHOLD_MS) continue
201
+
202
+ markPersistedRunInterrupted(run, 'Watchdog: orphaned run detected after server restart or HMR')
203
+ aborted++
204
+
205
+ const alreadyRunning = state.runningByExecution.has(execKey)
206
+ const alreadyQueued = (state.queueByExecution.get(execKey) || []).some((entry) => entry.run.sessionId === run.sessionId)
207
+ if (run.recoveryPayload && isRestartRecoverableSource(run.source) && !alreadyRunning && !alreadyQueued) {
208
+ try {
209
+ const payload = run.recoveryPayload
210
+ enqueueSessionRun({
211
+ sessionId: run.sessionId,
212
+ message: payload.message,
213
+ imagePath: payload.imagePath,
214
+ imageUrl: payload.imageUrl,
215
+ attachedFiles: payload.attachedFiles,
216
+ internal: payload.internal,
217
+ source: payload.source,
218
+ mode: normalizeMode(payload.mode, payload.internal),
219
+ dedupeKey: run.dedupeKey,
220
+ maxRuntimeMs: payload.maxRuntimeMs,
221
+ modelOverride: payload.modelOverride,
222
+ heartbeatConfig: payload.heartbeatConfig,
223
+ replyToId: payload.replyToId,
224
+ executionGroupKey: payload.executionGroupKey,
225
+ recoveredFromRestart: true,
226
+ recoveredFromRunId: run.id,
227
+ })
228
+ } catch (err: unknown) {
229
+ log.warn('session-run', `Watchdog: failed to re-enqueue orphaned run ${run.id}`, {
230
+ sessionId: run.sessionId,
231
+ error: errorMessage(err),
232
+ })
233
+ }
234
+ }
235
+ }
236
+
237
+ return { aborted }
238
+ }
@@ -0,0 +1,441 @@
1
+ import { genId } from '@/lib/id'
2
+ import type { RunEventRecord, SessionRunRecord, SessionRunStatus, SSEEvent } from '@/types'
3
+ import {
4
+ isRuntimeLockActive,
5
+ releaseRuntimeLock,
6
+ tryAcquireRuntimeLock,
7
+ } from '@/lib/server/runtime/runtime-lock-repository'
8
+ import { getSession } from '@/lib/server/sessions/session-repository'
9
+ import { log } from '@/lib/server/logger'
10
+ import { isInternalHeartbeatRun } from '@/lib/server/runtime/heartbeat-source'
11
+ import { cleanupSessionBrowser } from '@/lib/server/session-tools/web'
12
+ import { cancelDelegationJobsForParentSession } from '@/lib/server/agents/delegation-jobs'
13
+ import { getMainLoopStateForSession } from '@/lib/server/agents/main-agent-loop'
14
+ import { observeAutonomyRunOutcome } from '@/lib/server/autonomy/supervisor-reflection'
15
+ import { observeLearnedSkillRunOutcome } from '@/lib/server/skills/learned-skills'
16
+ import { errorMessage, hmrSingleton } from '@/lib/shared-utils'
17
+ import {
18
+ appendPersistedRunEvent,
19
+ patchPersistedRun,
20
+ persistRun,
21
+ } from '@/lib/server/runtime/run-ledger'
22
+ import { getActiveSessionProcess, stopActiveSessionProcess } from '@/lib/server/runtime/runtime-state'
23
+ import { notify } from '@/lib/server/ws-hub'
24
+ import type { SessionRunManagerState, SessionRunQueueEntry, SessionQueueMode } from './types'
25
+
26
+ export const MAX_RECENT_RUNS = 500
27
+ export const COLLECT_COALESCE_WINDOW_MS = 1500
28
+ export const SHARED_ACTIVITY_LEASE_TTL_MS = 15_000
29
+ export const SHARED_ACTIVITY_LEASE_RENEW_MS = 5_000
30
+ export const EXTERNAL_HOLD_TTL_MS = 60_000
31
+ export const MAX_DRAIN_DEPTH = 25
32
+ export const HEARTBEAT_BUSY_RETRY_MS = 1_000
33
+ export const STALE_QUEUED_RUN_MS = 15_000
34
+ export const STUCK_RUN_THRESHOLD_MS = 20 * 60_000
35
+ export const SHARED_ACTIVITY_LEASE_OWNER = `session-run:${process.pid}:${genId(6)}`
36
+
37
+ export const state: SessionRunManagerState = hmrSingleton<SessionRunManagerState>(
38
+ '__swarmclaw_session_run_manager__',
39
+ () => ({
40
+ runningByExecution: new Map<string, SessionRunQueueEntry>(),
41
+ queueByExecution: new Map<string, SessionRunQueueEntry[]>(),
42
+ runs: new Map<string, SessionRunRecord>(),
43
+ recentRunIds: [],
44
+ promises: new Map<string, Promise<import('@/lib/server/chat-execution/chat-execution').ExecuteChatTurnResult>>(),
45
+ deferredDrainTimers: new Map<string, ReturnType<typeof setTimeout>>(),
46
+ activityLeaseRenewTimers: new Map<string, ReturnType<typeof setInterval>>(),
47
+ externalSessionHolds: new Map<string, number>(),
48
+ externalHoldTimers: new Map<string, ReturnType<typeof setTimeout>>(),
49
+ drainDepth: new Map<string, number>(),
50
+ lastQueuedAt: 0,
51
+ nonHeartbeatWorkCount: new Map<string, number>(),
52
+ }),
53
+ )
54
+
55
+ export const recoveryState = hmrSingleton('__swarmclaw_session_run_recovery__', () => ({ completed: false }))
56
+
57
+ if (!state.runningByExecution) state.runningByExecution = new Map<string, SessionRunQueueEntry>()
58
+ if (!state.queueByExecution) state.queueByExecution = new Map<string, SessionRunQueueEntry[]>()
59
+ if (!state.runs) state.runs = new Map<string, SessionRunRecord>()
60
+ if (!state.recentRunIds) state.recentRunIds = []
61
+ if (!state.promises) {
62
+ state.promises = new Map<string, Promise<import('@/lib/server/chat-execution/chat-execution').ExecuteChatTurnResult>>()
63
+ }
64
+ if (!state.deferredDrainTimers) state.deferredDrainTimers = new Map<string, ReturnType<typeof setTimeout>>()
65
+ if (!state.activityLeaseRenewTimers) state.activityLeaseRenewTimers = new Map<string, ReturnType<typeof setInterval>>()
66
+ if (!state.externalSessionHolds) state.externalSessionHolds = new Map<string, number>()
67
+ if (!state.externalHoldTimers) state.externalHoldTimers = new Map<string, ReturnType<typeof setTimeout>>()
68
+ if (!state.drainDepth) state.drainDepth = new Map<string, number>()
69
+ if (typeof state.lastQueuedAt !== 'number') state.lastQueuedAt = 0
70
+ if (!state.nonHeartbeatWorkCount) state.nonHeartbeatWorkCount = new Map<string, number>()
71
+
72
+ export function now() {
73
+ return Date.now()
74
+ }
75
+
76
+ export function nextQueuedAt() {
77
+ const current = now()
78
+ const next = current <= state.lastQueuedAt ? state.lastQueuedAt + 1 : current
79
+ state.lastQueuedAt = next
80
+ return next
81
+ }
82
+
83
+ export function messagePreview(text: string): string {
84
+ return (text || '').replace(/\s+/g, ' ').trim().slice(0, 140)
85
+ }
86
+
87
+ function trimRecentRuns() {
88
+ while (state.recentRunIds.length > MAX_RECENT_RUNS) {
89
+ const id = state.recentRunIds.shift()
90
+ if (!id) continue
91
+ state.runs.delete(id)
92
+ state.promises.delete(id)
93
+ }
94
+ }
95
+
96
+ export function syncRunRecord(run: SessionRunRecord): SessionRunRecord {
97
+ state.runs.set(run.id, run)
98
+ persistRun(run)
99
+ return run
100
+ }
101
+
102
+ export function registerRun(run: SessionRunRecord) {
103
+ syncRunRecord(run)
104
+ state.recentRunIds.push(run.id)
105
+ trimRecentRuns()
106
+ }
107
+
108
+ function shouldPersistRunEvent(event: SSEEvent): boolean {
109
+ return event.t !== 'd' && event.t !== 'thinking' && event.t !== 'reset'
110
+ }
111
+
112
+ export function persistEventForRun(entry: SessionRunQueueEntry, event: SSEEvent, opts?: {
113
+ phase?: RunEventRecord['phase']
114
+ status?: SessionRunStatus
115
+ summary?: string
116
+ }): void {
117
+ if (!shouldPersistRunEvent(event)) return
118
+ appendPersistedRunEvent({
119
+ runId: entry.run.id,
120
+ sessionId: entry.run.sessionId,
121
+ phase: opts?.phase || 'event',
122
+ status: opts?.status,
123
+ summary: opts?.summary,
124
+ event,
125
+ })
126
+ }
127
+
128
+ export function chainCallerSignal(callerSignal: AbortSignal, controller: AbortController): void {
129
+ if (callerSignal.aborted) {
130
+ controller.abort()
131
+ return
132
+ }
133
+ const onAbort = () => controller.abort()
134
+ callerSignal.addEventListener('abort', onAbort, { once: true })
135
+ }
136
+
137
+ export function emitToSubscribers(entry: SessionRunQueueEntry, event: SSEEvent) {
138
+ persistEventForRun(entry, event)
139
+ for (const send of entry.onEvents) {
140
+ try {
141
+ send(event)
142
+ } catch {
143
+ // Subscriber stream can be closed by the client.
144
+ }
145
+ }
146
+ }
147
+
148
+ export function emitRunMeta(entry: SessionRunQueueEntry, status: SessionRunStatus, extra?: Record<string, unknown>) {
149
+ const event: SSEEvent = {
150
+ t: 'md',
151
+ text: JSON.stringify({
152
+ run: {
153
+ id: entry.run.id,
154
+ sessionId: entry.run.sessionId,
155
+ status,
156
+ source: entry.run.source,
157
+ internal: entry.run.internal,
158
+ ...extra,
159
+ },
160
+ }),
161
+ }
162
+ persistEventForRun(entry, event, { phase: 'status', status })
163
+ for (const send of entry.onEvents) {
164
+ try {
165
+ send(event)
166
+ } catch {
167
+ // Subscriber stream can be closed by the client.
168
+ }
169
+ }
170
+ notifySessionRunState(entry.run.sessionId)
171
+ }
172
+
173
+ export function notifySessionRunState(sessionId: string): void {
174
+ notify('runs')
175
+ notify('sessions')
176
+ notify(`session:${sessionId}`)
177
+ }
178
+
179
+ export function queueAutonomyObservation(input: {
180
+ runId: string
181
+ sessionId: string
182
+ source: string
183
+ status: SessionRunStatus
184
+ resultText?: string | null
185
+ error?: string | null
186
+ toolEvents?: import('@/lib/server/chat-execution/chat-execution').ExecuteChatTurnResult['toolEvents']
187
+ sourceMessage?: string | null
188
+ }) {
189
+ const session = getSession(input.sessionId)
190
+ void observeAutonomyRunOutcome({
191
+ runId: input.runId,
192
+ sessionId: input.sessionId,
193
+ agentId: session?.agentId || null,
194
+ source: input.source,
195
+ status: input.status,
196
+ resultText: input.resultText,
197
+ error: input.error || undefined,
198
+ toolEvents: input.toolEvents,
199
+ mainLoopState: getMainLoopStateForSession(input.sessionId),
200
+ sourceMessage: input.sourceMessage,
201
+ }).then(({ reflection }) => observeLearnedSkillRunOutcome({
202
+ runId: input.runId,
203
+ sessionId: input.sessionId,
204
+ agentId: session?.agentId || null,
205
+ source: input.source,
206
+ status: input.status,
207
+ resultText: input.resultText,
208
+ error: input.error || undefined,
209
+ toolEvents: input.toolEvents,
210
+ reflection,
211
+ })).catch((err: unknown) => {
212
+ log.warn('session-run', `Autonomy observation failed for ${input.runId}`, {
213
+ sessionId: input.sessionId,
214
+ error: errorMessage(err),
215
+ })
216
+ })
217
+ }
218
+
219
+ export function markRunningEntryCancelled(entry: SessionRunQueueEntry, reason: string) {
220
+ if (entry.run.status === 'cancelled') return
221
+ entry.run.status = 'cancelled'
222
+ entry.run.endedAt = now()
223
+ entry.run.error = reason
224
+ syncRunRecord(entry.run)
225
+ emitRunMeta(entry, 'cancelled', { reason })
226
+ }
227
+
228
+ export function abortSessionRuntime(entry: SessionRunQueueEntry, reason: string) {
229
+ markRunningEntryCancelled(entry, reason)
230
+ entry.signalController.abort()
231
+ try { getActiveSessionProcess(entry.run.sessionId)?.kill?.() } catch { /* noop */ }
232
+ stopActiveSessionProcess(entry.run.sessionId)
233
+ try { cleanupSessionBrowser(entry.run.sessionId) } catch { /* noop */ }
234
+ try { cancelDelegationJobsForParentSession(entry.run.sessionId, reason) } catch { /* noop */ }
235
+ }
236
+
237
+ export function executionKeyForSession(sessionId: string): string {
238
+ return `session:${sessionId}`
239
+ }
240
+
241
+ export function nonHeartbeatActivityLeaseName(sessionId: string): string {
242
+ return `session-non-heartbeat:${sessionId}`
243
+ }
244
+
245
+ export function hasActiveNonHeartbeatSessionLease(sessionId: string): boolean {
246
+ return isRuntimeLockActive(nonHeartbeatActivityLeaseName(sessionId))
247
+ }
248
+
249
+ export function hasExternalSessionExecutionHold(sessionId: string): boolean {
250
+ return (state.externalSessionHolds.get(sessionId) || 0) > 0
251
+ }
252
+
253
+ export function acquireExternalSessionExecutionHold(
254
+ sessionId: string,
255
+ onRelease: (executionKey: string) => void,
256
+ ): () => void {
257
+ const current = state.externalSessionHolds.get(sessionId) || 0
258
+ state.externalSessionHolds.set(sessionId, current + 1)
259
+ let released = false
260
+ const holdKey = `${sessionId}:${current + 1}`
261
+ const ttlTimer = setTimeout(() => {
262
+ if (released) return
263
+ log.warn('session-run', 'External hold auto-released after TTL', { sessionId, holdKey, ttlMs: EXTERNAL_HOLD_TTL_MS })
264
+ release()
265
+ }, EXTERNAL_HOLD_TTL_MS)
266
+ state.externalHoldTimers.set(holdKey, ttlTimer)
267
+ const release = () => {
268
+ if (released) return
269
+ released = true
270
+ const timer = state.externalHoldTimers.get(holdKey)
271
+ if (timer) {
272
+ clearTimeout(timer)
273
+ state.externalHoldTimers.delete(holdKey)
274
+ }
275
+ const next = (state.externalSessionHolds.get(sessionId) || 1) - 1
276
+ if (next > 0) state.externalSessionHolds.set(sessionId, next)
277
+ else state.externalSessionHolds.delete(sessionId)
278
+ onRelease(executionKeyForSession(sessionId))
279
+ }
280
+ return release
281
+ }
282
+
283
+ export function queueForExecution(executionKey: string): SessionRunQueueEntry[] {
284
+ const existing = state.queueByExecution.get(executionKey)
285
+ if (existing) return existing
286
+ const created: SessionRunQueueEntry[] = []
287
+ state.queueByExecution.set(executionKey, created)
288
+ return created
289
+ }
290
+
291
+ export function normalizeMode(mode: string | undefined, internal: boolean): SessionQueueMode {
292
+ if (mode === 'steer' || mode === 'collect' || mode === 'followup') return mode
293
+ return internal ? 'collect' : 'followup'
294
+ }
295
+
296
+ export function markPersistedRunInterrupted(run: SessionRunRecord, reason: string): SessionRunRecord {
297
+ const interruptedAt = now()
298
+ const next = patchPersistedRun(run.id, (current) => {
299
+ const target = current || run
300
+ return {
301
+ ...target,
302
+ status: 'cancelled',
303
+ endedAt: target.endedAt || interruptedAt,
304
+ interruptedAt,
305
+ interruptedReason: reason,
306
+ error: target.error || reason,
307
+ }
308
+ }) || {
309
+ ...run,
310
+ status: 'cancelled',
311
+ endedAt: run.endedAt || interruptedAt,
312
+ interruptedAt,
313
+ interruptedReason: reason,
314
+ error: run.error || reason,
315
+ }
316
+ state.runs.set(next.id, next)
317
+ if (!state.recentRunIds.includes(next.id)) {
318
+ state.recentRunIds.push(next.id)
319
+ trimRecentRuns()
320
+ }
321
+ appendPersistedRunEvent({
322
+ runId: next.id,
323
+ sessionId: next.sessionId,
324
+ phase: 'status',
325
+ status: 'cancelled',
326
+ summary: reason,
327
+ event: {
328
+ t: 'md',
329
+ text: JSON.stringify({
330
+ run: {
331
+ id: next.id,
332
+ sessionId: next.sessionId,
333
+ status: 'cancelled',
334
+ interrupted: true,
335
+ reason,
336
+ },
337
+ }),
338
+ },
339
+ })
340
+ return next
341
+ }
342
+
343
+ function isNonHeartbeatEntry(entry: SessionRunQueueEntry): boolean {
344
+ return !isInternalHeartbeatRun(entry.run.internal, entry.run.source)
345
+ }
346
+
347
+ export function incrementNonHeartbeatWork(entry: SessionRunQueueEntry): void {
348
+ if (!isNonHeartbeatEntry(entry)) return
349
+ entry.nonHeartbeatCounted = true
350
+ state.nonHeartbeatWorkCount.set(entry.run.sessionId, (state.nonHeartbeatWorkCount.get(entry.run.sessionId) || 0) + 1)
351
+ }
352
+
353
+ export function decrementNonHeartbeatWork(entry: SessionRunQueueEntry): void {
354
+ if (!entry.nonHeartbeatCounted) return
355
+ entry.nonHeartbeatCounted = false
356
+ const sessionId = entry.run.sessionId
357
+ const count = (state.nonHeartbeatWorkCount.get(sessionId) || 0) - 1
358
+ if (count <= 0) state.nonHeartbeatWorkCount.delete(sessionId)
359
+ else state.nonHeartbeatWorkCount.set(sessionId, count)
360
+ }
361
+
362
+ export function hasLocalNonHeartbeatWork(sessionId: string): boolean {
363
+ return (state.nonHeartbeatWorkCount.get(sessionId) || 0) > 0
364
+ }
365
+
366
+ export function clearDeferredDrain(executionKey: string): void {
367
+ const timer = state.deferredDrainTimers.get(executionKey)
368
+ if (!timer) return
369
+ clearTimeout(timer)
370
+ state.deferredDrainTimers.delete(executionKey)
371
+ }
372
+
373
+ export function deleteQueueEntry(queue: SessionRunQueueEntry[], target: SessionRunQueueEntry): boolean {
374
+ const idx = queue.indexOf(target)
375
+ if (idx === -1) return false
376
+ queue.splice(idx, 1)
377
+ return true
378
+ }
379
+
380
+ export function scheduleDeferredDrain(
381
+ executionKey: string,
382
+ onDrain: (executionKey: string) => void,
383
+ delayMs = HEARTBEAT_BUSY_RETRY_MS,
384
+ ): void {
385
+ if (state.deferredDrainTimers.has(executionKey)) return
386
+ const timer = setTimeout(() => {
387
+ state.deferredDrainTimers.delete(executionKey)
388
+ onDrain(executionKey)
389
+ }, delayMs)
390
+ state.deferredDrainTimers.set(executionKey, timer)
391
+ }
392
+
393
+ function stopSessionActivityLease(sessionId: string): void {
394
+ const timer = state.activityLeaseRenewTimers.get(sessionId)
395
+ if (timer) {
396
+ clearInterval(timer)
397
+ state.activityLeaseRenewTimers.delete(sessionId)
398
+ }
399
+ releaseRuntimeLock(nonHeartbeatActivityLeaseName(sessionId), SHARED_ACTIVITY_LEASE_OWNER)
400
+ }
401
+
402
+ function startSessionActivityLease(sessionId: string): void {
403
+ if (state.activityLeaseRenewTimers.has(sessionId)) return
404
+ const leaseName = nonHeartbeatActivityLeaseName(sessionId)
405
+ tryAcquireRuntimeLock(leaseName, SHARED_ACTIVITY_LEASE_OWNER, SHARED_ACTIVITY_LEASE_TTL_MS)
406
+ const timer = setInterval(() => {
407
+ if (!hasLocalNonHeartbeatWork(sessionId)) {
408
+ stopSessionActivityLease(sessionId)
409
+ return
410
+ }
411
+ tryAcquireRuntimeLock(leaseName, SHARED_ACTIVITY_LEASE_OWNER, SHARED_ACTIVITY_LEASE_TTL_MS)
412
+ }, SHARED_ACTIVITY_LEASE_RENEW_MS)
413
+ state.activityLeaseRenewTimers.set(sessionId, timer)
414
+ }
415
+
416
+ export function reconcileSessionActivityLease(sessionId: string): void {
417
+ if (hasLocalNonHeartbeatWork(sessionId)) startSessionActivityLease(sessionId)
418
+ else stopSessionActivityLease(sessionId)
419
+ }
420
+
421
+ export function resetSessionRunManagerStateForTests(): void {
422
+ recoveryState.completed = false
423
+ for (const timer of state.deferredDrainTimers.values()) clearTimeout(timer)
424
+ state.deferredDrainTimers.clear()
425
+ for (const [sessionId, timer] of state.activityLeaseRenewTimers.entries()) {
426
+ clearInterval(timer)
427
+ releaseRuntimeLock(nonHeartbeatActivityLeaseName(sessionId), SHARED_ACTIVITY_LEASE_OWNER)
428
+ }
429
+ state.activityLeaseRenewTimers.clear()
430
+ state.runningByExecution.clear()
431
+ state.queueByExecution.clear()
432
+ state.runs.clear()
433
+ state.recentRunIds.length = 0
434
+ state.promises.clear()
435
+ state.externalSessionHolds.clear()
436
+ for (const timer of state.externalHoldTimers.values()) clearTimeout(timer)
437
+ state.externalHoldTimers.clear()
438
+ state.nonHeartbeatWorkCount.clear()
439
+ state.drainDepth.clear()
440
+ state.lastQueuedAt = 0
441
+ }