@swarmclawai/swarmclaw 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/README.md +16 -85
  2. package/bin/server-cmd.js +64 -1
  3. package/package.json +2 -2
  4. package/skills/coding-agent/SKILL.md +111 -0
  5. package/skills/github/SKILL.md +140 -0
  6. package/skills/nano-banana-pro/SKILL.md +62 -0
  7. package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
  8. package/skills/nano-pdf/SKILL.md +53 -0
  9. package/skills/openai-image-gen/SKILL.md +78 -0
  10. package/skills/openai-image-gen/scripts/gen.py +328 -0
  11. package/skills/resourceful-problem-solving/SKILL.md +49 -0
  12. package/skills/skill-creator/SKILL.md +147 -0
  13. package/skills/skill-creator/scripts/init_skill.py +378 -0
  14. package/skills/skill-creator/scripts/quick_validate.py +159 -0
  15. package/skills/summarize/SKILL.md +77 -0
  16. package/src/app/api/auth/route.ts +20 -5
  17. package/src/app/api/chats/[id]/devserver/route.ts +13 -19
  18. package/src/app/api/chats/[id]/messages/route.ts +13 -15
  19. package/src/app/api/chats/[id]/route.ts +9 -10
  20. package/src/app/api/chats/[id]/stop/route.ts +5 -7
  21. package/src/app/api/chats/messages-route.test.ts +8 -6
  22. package/src/app/api/chats/route.ts +9 -10
  23. package/src/app/api/ip/route.ts +2 -2
  24. package/src/app/api/preview-server/route.ts +1 -1
  25. package/src/app/api/projects/[id]/route.ts +7 -46
  26. package/src/cli/server-cmd.test.js +74 -0
  27. package/src/components/chat/chat-area.tsx +45 -23
  28. package/src/components/chat/message-bubble.test.ts +35 -0
  29. package/src/components/chat/message-bubble.tsx +19 -9
  30. package/src/components/chat/message-list.tsx +37 -3
  31. package/src/components/input/chat-input.tsx +34 -14
  32. package/src/components/openclaw/openclaw-deploy-panel.tsx +4 -0
  33. package/src/instrumentation.ts +1 -1
  34. package/src/lib/chat/assistant-render-id.ts +3 -0
  35. package/src/lib/chat/chat-streaming-state.test.ts +42 -3
  36. package/src/lib/chat/chat-streaming-state.ts +20 -8
  37. package/src/lib/chat/queued-message-queue.test.ts +23 -1
  38. package/src/lib/chat/queued-message-queue.ts +11 -2
  39. package/src/lib/providers/cli-utils.test.ts +124 -0
  40. package/src/lib/server/activity/activity-log.ts +21 -0
  41. package/src/lib/server/agents/agent-availability.test.ts +10 -5
  42. package/src/lib/server/agents/agent-cascade.ts +79 -59
  43. package/src/lib/server/agents/agent-registry.ts +3 -1
  44. package/src/lib/server/agents/agent-repository.ts +90 -0
  45. package/src/lib/server/agents/delegation-job-repository.ts +53 -0
  46. package/src/lib/server/agents/delegation-jobs.ts +11 -4
  47. package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
  48. package/src/lib/server/agents/guardian.ts +2 -2
  49. package/src/lib/server/agents/main-agent-loop.ts +10 -3
  50. package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
  51. package/src/lib/server/agents/subagent-runtime.ts +9 -6
  52. package/src/lib/server/agents/subagent-swarm.ts +3 -2
  53. package/src/lib/server/agents/task-session.ts +3 -4
  54. package/src/lib/server/approvals/approval-repository.ts +30 -0
  55. package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
  56. package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
  57. package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
  58. package/src/lib/server/chat-execution/chat-execution.ts +84 -1926
  59. package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
  60. package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
  61. package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
  62. package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
  63. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
  64. package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
  65. package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
  66. package/src/lib/server/chat-execution/post-stream-finalization.ts +1 -1
  67. package/src/lib/server/chat-execution/prompt-builder.ts +11 -0
  68. package/src/lib/server/chat-execution/prompt-sections.ts +5 -6
  69. package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
  70. package/src/lib/server/chat-execution/stream-agent-chat.ts +16 -13
  71. package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
  72. package/src/lib/server/connectors/connector-repository.ts +58 -0
  73. package/src/lib/server/connectors/runtime-state.test.ts +117 -0
  74. package/src/lib/server/credentials/credential-repository.ts +7 -0
  75. package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
  76. package/src/lib/server/memory/memory-abstract.test.ts +59 -0
  77. package/src/lib/server/missions/mission-repository.ts +74 -0
  78. package/src/lib/server/missions/mission-service/actions.ts +6 -0
  79. package/src/lib/server/missions/mission-service/bindings.ts +9 -0
  80. package/src/lib/server/missions/mission-service/context.ts +4 -0
  81. package/src/lib/server/missions/mission-service/core.ts +2269 -0
  82. package/src/lib/server/missions/mission-service/queries.ts +12 -0
  83. package/src/lib/server/missions/mission-service/recovery.ts +5 -0
  84. package/src/lib/server/missions/mission-service/ticks.ts +9 -0
  85. package/src/lib/server/missions/mission-service.test.ts +9 -2
  86. package/src/lib/server/missions/mission-service.ts +6 -2266
  87. package/src/lib/server/openclaw/deploy.test.ts +42 -3
  88. package/src/lib/server/openclaw/deploy.ts +26 -12
  89. package/src/lib/server/persistence/repository-utils.ts +154 -0
  90. package/src/lib/server/persistence/storage-context.ts +51 -0
  91. package/src/lib/server/persistence/transaction.ts +1 -0
  92. package/src/lib/server/projects/project-repository.ts +36 -0
  93. package/src/lib/server/projects/project-service.ts +79 -0
  94. package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
  95. package/src/lib/server/runtime/alert-dispatch.ts +1 -1
  96. package/src/lib/server/runtime/daemon-policy.ts +1 -1
  97. package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
  98. package/src/lib/server/runtime/daemon-state/health.ts +6 -0
  99. package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
  100. package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
  101. package/src/lib/server/runtime/daemon-state.test.ts +48 -0
  102. package/src/lib/server/runtime/daemon-state.ts +3 -1470
  103. package/src/lib/server/runtime/estop-repository.ts +4 -0
  104. package/src/lib/server/runtime/estop.ts +3 -1
  105. package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
  106. package/src/lib/server/runtime/heartbeat-service.ts +55 -34
  107. package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
  108. package/src/lib/server/runtime/idle-window.ts +2 -2
  109. package/src/lib/server/runtime/network.ts +11 -0
  110. package/src/lib/server/runtime/orchestrator-events.ts +2 -2
  111. package/src/lib/server/runtime/queue/claims.ts +4 -0
  112. package/src/lib/server/runtime/queue/core.ts +2079 -0
  113. package/src/lib/server/runtime/queue/execution.ts +7 -0
  114. package/src/lib/server/runtime/queue/followups.ts +4 -0
  115. package/src/lib/server/runtime/queue/queries.ts +12 -0
  116. package/src/lib/server/runtime/queue/recovery.ts +7 -0
  117. package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
  118. package/src/lib/server/runtime/queue-repository.ts +17 -0
  119. package/src/lib/server/runtime/queue.ts +5 -2061
  120. package/src/lib/server/runtime/run-ledger.ts +6 -5
  121. package/src/lib/server/runtime/run-repository.ts +73 -0
  122. package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
  123. package/src/lib/server/runtime/runtime-settings.ts +1 -1
  124. package/src/lib/server/runtime/runtime-state.ts +99 -0
  125. package/src/lib/server/runtime/scheduler.ts +4 -2
  126. package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
  127. package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
  128. package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
  129. package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
  130. package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
  131. package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
  132. package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
  133. package/src/lib/server/runtime/session-run-manager.ts +72 -1377
  134. package/src/lib/server/runtime/watch-job-repository.ts +35 -0
  135. package/src/lib/server/runtime/watch-jobs.ts +3 -1
  136. package/src/lib/server/schedules/schedule-repository.ts +42 -0
  137. package/src/lib/server/sessions/session-repository.ts +85 -0
  138. package/src/lib/server/settings/settings-repository.ts +25 -0
  139. package/src/lib/server/skills/skill-discovery.test.ts +2 -2
  140. package/src/lib/server/skills/skill-discovery.ts +2 -2
  141. package/src/lib/server/skills/skill-repository.ts +14 -0
  142. package/src/lib/server/storage.ts +13 -24
  143. package/src/lib/server/tasks/task-repository.ts +54 -0
  144. package/src/lib/server/usage/usage-repository.ts +30 -0
  145. package/src/lib/server/webhooks/webhook-repository.ts +10 -0
  146. package/src/lib/strip-internal-metadata.test.ts +42 -41
  147. package/src/stores/use-chat-store.test.ts +54 -0
  148. package/src/stores/use-chat-store.ts +21 -5
  149. /package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0
@@ -0,0 +1,287 @@
1
+ import { genId } from '@/lib/id'
2
+ import type { SessionRunRecord } from '@/types'
3
+ import { getSession } from '@/lib/server/sessions/session-repository'
4
+ import { loadRuntimeSettings } from '@/lib/server/runtime/runtime-settings'
5
+ import { log } from '@/lib/server/logger'
6
+ import { isInternalHeartbeatRun } from '@/lib/server/runtime/heartbeat-source'
7
+ import { getEnabledToolIds } from '@/lib/capability-selection'
8
+ import { isAllEstopEngaged, isAutonomyEstopEngaged } from '@/lib/server/runtime/estop'
9
+ import { getActiveSessionProcess } from '@/lib/server/runtime/runtime-state'
10
+
11
+ import { cancelPendingForSession } from './cancellation'
12
+ import {
13
+ abortSessionRuntime,
14
+ chainCallerSignal,
15
+ COLLECT_COALESCE_WINDOW_MS,
16
+ emitRunMeta,
17
+ executionKeyForSession,
18
+ incrementNonHeartbeatWork,
19
+ messagePreview,
20
+ nextQueuedAt,
21
+ normalizeMode,
22
+ queueForExecution,
23
+ reconcileSessionActivityLease,
24
+ registerRun,
25
+ state,
26
+ syncRunRecord,
27
+ } from './state'
28
+ import type {
29
+ EnqueueSessionRunInput,
30
+ EnqueueSessionRunResult,
31
+ SessionQueueMode,
32
+ SessionRunQueueEntry,
33
+ } from './types'
34
+
35
+ type RepairSessionRunQueueFn = (
36
+ sessionId: string,
37
+ opts?: {
38
+ executionKey?: string
39
+ maxQueuedAgeMs?: number
40
+ reason?: string
41
+ },
42
+ ) => { kickedExecutionKeys: number; recoveredQueuedRuns: number }
43
+
44
+ type DrainExecutionFn = (executionKey: string) => Promise<void>
45
+
46
+ const LONG_TOOL_NAMES: ReadonlySet<string> = new Set(['claude_code', 'codex_cli', 'opencode_cli'])
47
+
48
+ type SessionToolConfig = {
49
+ tools?: string[] | null
50
+ extensions?: string[] | null
51
+ }
52
+
53
+ function computeEffectiveRunTimeoutMs(
54
+ baseTimeoutMs: number,
55
+ sessionTools: string[],
56
+ runtime: { claudeCodeTimeoutMs: number },
57
+ ): number {
58
+ const hasLongTool = sessionTools.some((tool) => LONG_TOOL_NAMES.has(tool))
59
+ if (!hasLongTool) return baseTimeoutMs
60
+ const toolTimeout = runtime.claudeCodeTimeoutMs + 120_000
61
+ return Math.max(baseTimeoutMs, toolTimeout)
62
+ }
63
+
64
+ function isAutonomyManagedEnqueue(source: string, internal: boolean): boolean {
65
+ return !(source === 'chat' && !internal)
66
+ }
67
+
68
+ function buildRecoveryPayload(
69
+ input: EnqueueSessionRunInput,
70
+ source: string,
71
+ mode: SessionQueueMode,
72
+ maxRuntimeMs: number | undefined,
73
+ executionKey: string,
74
+ ) {
75
+ return {
76
+ message: input.message,
77
+ imagePath: input.imagePath,
78
+ imageUrl: input.imageUrl,
79
+ attachedFiles: input.attachedFiles,
80
+ internal: input.internal === true,
81
+ source,
82
+ mode,
83
+ maxRuntimeMs,
84
+ modelOverride: input.modelOverride,
85
+ heartbeatConfig: input.heartbeatConfig,
86
+ replyToId: input.replyToId,
87
+ executionGroupKey: executionKey.startsWith('session:') ? undefined : executionKey,
88
+ }
89
+ }
90
+
91
+ function findDedupeMatch(sessionId: string, dedupeKey?: string) {
92
+ if (!dedupeKey) return null
93
+ const executionKey = executionKeyForSession(sessionId)
94
+ const running = state.runningByExecution.get(executionKey)
95
+ if (running?.run.sessionId === sessionId && running.run.dedupeKey === dedupeKey) return running
96
+ const queue = queueForExecution(executionKey)
97
+ return queue.find((entry) => entry.run.sessionId === sessionId && entry.run.dedupeKey === dedupeKey) || null
98
+ }
99
+
100
+ export function enqueueSessionRun(
101
+ input: EnqueueSessionRunInput,
102
+ deps: {
103
+ repairSessionRunQueue: RepairSessionRunQueueFn
104
+ drainExecution: DrainExecutionFn
105
+ },
106
+ ): EnqueueSessionRunResult {
107
+ const internal = input.internal === true
108
+ const mode = normalizeMode(input.mode, internal)
109
+ const source = input.source || 'chat'
110
+ if (isAllEstopEngaged()) {
111
+ throw new Error('Execution is blocked because all estop is engaged.')
112
+ }
113
+ if (isAutonomyEstopEngaged() && isAutonomyManagedEnqueue(source, internal)) {
114
+ throw new Error(`Autonomy estop is engaged. New ${source} runs are paused.`)
115
+ }
116
+ const executionKey = typeof input.executionGroupKey === 'string' && input.executionGroupKey.trim()
117
+ ? input.executionGroupKey.trim()
118
+ : executionKeyForSession(input.sessionId)
119
+ deps.repairSessionRunQueue(input.sessionId, {
120
+ executionKey,
121
+ reason: 'Recovered stale queued run before enqueue',
122
+ })
123
+ const runtime = loadRuntimeSettings()
124
+ const defaultMaxRuntimeMs = runtime.ongoingLoopMaxRuntimeMs ?? (10 * 60_000)
125
+ const sessionData = getSession(input.sessionId) as SessionToolConfig | null
126
+ const sessionTools = getEnabledToolIds(sessionData)
127
+ const adjustedDefaultMs = computeEffectiveRunTimeoutMs(defaultMaxRuntimeMs, sessionTools, runtime)
128
+ const effectiveMaxRuntimeMs = typeof input.maxRuntimeMs === 'number'
129
+ ? input.maxRuntimeMs
130
+ : adjustedDefaultMs
131
+
132
+ const dedupe = findDedupeMatch(input.sessionId, input.dedupeKey)
133
+ if (dedupe) {
134
+ const cb = input.onEvent
135
+ if (cb) dedupe.onEvents.push(cb)
136
+ if (input.callerSignal) chainCallerSignal(input.callerSignal, dedupe.signalController)
137
+ return {
138
+ runId: dedupe.run.id,
139
+ position: 0,
140
+ deduped: true,
141
+ promise: dedupe.promise,
142
+ abort: () => dedupe.signalController.abort(),
143
+ unsubscribe: () => {
144
+ if (!cb) return
145
+ const idx = dedupe.onEvents.indexOf(cb)
146
+ if (idx >= 0) dedupe.onEvents.splice(idx, 1)
147
+ },
148
+ }
149
+ }
150
+
151
+ if (mode === 'steer') {
152
+ const running = state.runningByExecution.get(executionKey)
153
+ if (running && running.run.sessionId === input.sessionId) {
154
+ running.signalController.abort()
155
+ try { getActiveSessionProcess(input.sessionId)?.kill?.() } catch { /* noop */ }
156
+ }
157
+ cancelPendingForSession(input.sessionId, 'Cancelled by steer mode')
158
+ }
159
+
160
+ if (!internal && source === 'chat') {
161
+ const running = state.runningByExecution.get(executionKey)
162
+ if (running && isInternalHeartbeatRun(running.run.internal, running.run.source)) {
163
+ log.info('session-run', `Preempting heartbeat ${running.run.id} for user chat on ${input.sessionId}`)
164
+ abortSessionRuntime(running, 'Preempted by user chat')
165
+ state.runningByExecution.delete(executionKey)
166
+ }
167
+ }
168
+
169
+ const running = state.runningByExecution.get(executionKey)
170
+ const queue = queueForExecution(executionKey)
171
+ if (mode === 'collect' && !input.imagePath && !input.imageUrl && !input.attachedFiles?.length) {
172
+ const nowMs = nextQueuedAt()
173
+ const candidate = queue.at(-1)
174
+ const canCoalesce = !!candidate
175
+ && candidate.run.mode === 'collect'
176
+ && candidate.run.internal === internal
177
+ && candidate.run.source === source
178
+ && !candidate.imagePath
179
+ && !candidate.imageUrl
180
+ && !candidate.attachedFiles?.length
181
+ && (nowMs - candidate.run.queuedAt) <= COLLECT_COALESCE_WINDOW_MS
182
+
183
+ if (candidate && canCoalesce) {
184
+ const nextChunk = input.message.trim()
185
+ if (nextChunk) {
186
+ const current = candidate.message.trim()
187
+ candidate.message = current
188
+ ? `${current}\n\n[Collected follow-up]\n${nextChunk}`
189
+ : nextChunk
190
+ candidate.run.messagePreview = messagePreview(candidate.message)
191
+ candidate.run.queuedAt = nowMs
192
+ syncRunRecord(candidate.run)
193
+ }
194
+ const coalesceCb = input.onEvent
195
+ if (coalesceCb) candidate.onEvents.push(coalesceCb)
196
+ if (input.callerSignal) chainCallerSignal(input.callerSignal, candidate.signalController)
197
+ emitRunMeta(candidate, 'queued', { position: 0, coalesced: true, mergedIntoRunId: candidate.run.id })
198
+ return {
199
+ runId: candidate.run.id,
200
+ position: 0,
201
+ coalesced: true,
202
+ promise: candidate.promise,
203
+ abort: () => candidate.signalController.abort(),
204
+ unsubscribe: () => {
205
+ if (!coalesceCb) return
206
+ const idx = candidate.onEvents.indexOf(coalesceCb)
207
+ if (idx >= 0) candidate.onEvents.splice(idx, 1)
208
+ },
209
+ }
210
+ }
211
+ }
212
+
213
+ const runId = genId(8)
214
+ const run: SessionRunRecord = {
215
+ id: runId,
216
+ sessionId: input.sessionId,
217
+ missionId: input.missionId ?? getSession(input.sessionId)?.missionId ?? null,
218
+ source,
219
+ internal,
220
+ mode,
221
+ status: 'queued',
222
+ messagePreview: messagePreview(input.message),
223
+ dedupeKey: input.dedupeKey,
224
+ queuedAt: nextQueuedAt(),
225
+ recoveredFromRestart: input.recoveredFromRestart === true,
226
+ recoveredFromRunId: input.recoveredFromRunId,
227
+ recoveryPayload: buildRecoveryPayload(
228
+ input,
229
+ source,
230
+ mode,
231
+ effectiveMaxRuntimeMs > 0 ? effectiveMaxRuntimeMs : undefined,
232
+ executionKey,
233
+ ),
234
+ }
235
+ registerRun(run)
236
+
237
+ let resolve!: EnqueueSessionRunResult['promise'] extends Promise<infer T> ? (value: T) => void : never
238
+ let reject!: (error: Error) => void
239
+ const promise = new Promise<import('@/lib/server/chat-execution/chat-execution-types').ExecuteChatTurnResult>((res, rej) => {
240
+ resolve = res
241
+ reject = rej
242
+ })
243
+ promise.catch(() => {})
244
+ state.promises.set(runId, promise)
245
+
246
+ const entry: SessionRunQueueEntry = {
247
+ executionKey,
248
+ run,
249
+ message: input.message,
250
+ imagePath: input.imagePath,
251
+ imageUrl: input.imageUrl,
252
+ attachedFiles: input.attachedFiles,
253
+ onEvents: input.onEvent ? [input.onEvent] : [],
254
+ signalController: new AbortController(),
255
+ maxRuntimeMs: effectiveMaxRuntimeMs > 0 ? effectiveMaxRuntimeMs : undefined,
256
+ modelOverride: input.modelOverride,
257
+ heartbeatConfig: input.heartbeatConfig,
258
+ replyToId: input.replyToId,
259
+ resolve,
260
+ reject,
261
+ promise,
262
+ }
263
+
264
+ if (input.callerSignal) chainCallerSignal(input.callerSignal, entry.signalController)
265
+
266
+ queue.push(entry)
267
+ incrementNonHeartbeatWork(entry)
268
+ if (entry.nonHeartbeatCounted) {
269
+ reconcileSessionActivityLease(input.sessionId)
270
+ }
271
+ const position = (running ? 1 : 0) + queue.length - 1
272
+ emitRunMeta(entry, 'queued', { position })
273
+ void deps.drainExecution(executionKey)
274
+
275
+ const entryCb = input.onEvent
276
+ return {
277
+ runId,
278
+ position,
279
+ promise,
280
+ abort: () => entry.signalController.abort(),
281
+ unsubscribe: () => {
282
+ if (!entryCb) return
283
+ const idx = entry.onEvents.indexOf(entryCb)
284
+ if (idx >= 0) entry.onEvents.splice(idx, 1)
285
+ },
286
+ }
287
+ }
@@ -0,0 +1,117 @@
1
+ import type {
2
+ RunEventRecord,
3
+ SessionQueueSnapshot,
4
+ SessionQueuedTurn,
5
+ SessionRunRecord,
6
+ SessionRunStatus,
7
+ } from '@/types'
8
+ import {
9
+ listPersistedRunEvents,
10
+ listPersistedRuns,
11
+ loadPersistedRun,
12
+ } from '@/lib/server/runtime/run-ledger'
13
+ import { isInternalHeartbeatRun } from '@/lib/server/runtime/heartbeat-source'
14
+
15
+ import { state } from './state'
16
+ import type { SessionRunQueueEntry } from './types'
17
+
18
+ export function getSessionRunState(sessionId: string): {
19
+ runningRunId?: string
20
+ queueLength: number
21
+ } {
22
+ const summary = getSessionExecutionState(sessionId)
23
+ return {
24
+ runningRunId: summary.runningRunId,
25
+ queueLength: summary.queueLength,
26
+ }
27
+ }
28
+
29
+ function visibleQueuedEntriesForSession(sessionId: string): SessionRunQueueEntry[] {
30
+ return Array.from(state.queueByExecution.values())
31
+ .flatMap((queue) => queue)
32
+ .filter((entry) => entry.run.sessionId === sessionId && entry.run.internal !== true)
33
+ .sort((left, right) => left.run.queuedAt - right.run.queuedAt)
34
+ }
35
+
36
+ function toQueuedTurn(entry: SessionRunQueueEntry, index: number): SessionQueuedTurn {
37
+ return {
38
+ runId: entry.run.id,
39
+ sessionId: entry.run.sessionId,
40
+ missionId: entry.run.missionId || null,
41
+ text: entry.message,
42
+ queuedAt: entry.run.queuedAt,
43
+ position: index + 1,
44
+ imagePath: entry.imagePath,
45
+ imageUrl: entry.imageUrl,
46
+ attachedFiles: entry.attachedFiles,
47
+ replyToId: entry.replyToId,
48
+ source: entry.run.source,
49
+ }
50
+ }
51
+
52
+ export function getSessionQueueSnapshot(sessionId: string): SessionQueueSnapshot {
53
+ const execution = getSessionExecutionState(sessionId)
54
+ const visibleQueued = visibleQueuedEntriesForSession(sessionId)
55
+ return {
56
+ sessionId,
57
+ activeRunId: execution.runningRunId || null,
58
+ queueLength: visibleQueued.length,
59
+ items: visibleQueued.map((entry, index) => toQueuedTurn(entry, index)),
60
+ }
61
+ }
62
+
63
+ export function getSessionExecutionState(sessionId: string): {
64
+ runningRunId?: string
65
+ queueLength: number
66
+ hasRunning: boolean
67
+ hasQueued: boolean
68
+ hasRunningHeartbeat: boolean
69
+ hasQueuedHeartbeat: boolean
70
+ hasRunningNonHeartbeat: boolean
71
+ hasQueuedNonHeartbeat: boolean
72
+ } {
73
+ const running = Array.from(state.runningByExecution.values())
74
+ .find((entry) => entry.run.sessionId === sessionId)
75
+ const runningMatchesSession = Boolean(running)
76
+ const runningHeartbeat = Boolean(
77
+ runningMatchesSession
78
+ && running
79
+ && isInternalHeartbeatRun(running.run.internal, running.run.source),
80
+ )
81
+ const runningNonHeartbeat = Boolean(runningMatchesSession && !runningHeartbeat)
82
+ const queuedEntries = Array.from(state.queueByExecution.values())
83
+ .flatMap((queue) => queue)
84
+ .filter((entry) => entry.run.sessionId === sessionId)
85
+ const queuedHeartbeat = queuedEntries.filter((entry) =>
86
+ isInternalHeartbeatRun(entry.run.internal, entry.run.source),
87
+ ).length
88
+ const queuedNonHeartbeat = queuedEntries.length - queuedHeartbeat
89
+ return {
90
+ runningRunId: (runningMatchesSession && running?.run.status === 'running')
91
+ ? running.run.id
92
+ : undefined,
93
+ queueLength: queuedEntries.length,
94
+ hasRunning: Boolean(runningMatchesSession),
95
+ hasQueued: queuedEntries.length > 0,
96
+ hasRunningHeartbeat: runningHeartbeat,
97
+ hasQueuedHeartbeat: queuedHeartbeat > 0,
98
+ hasRunningNonHeartbeat: runningNonHeartbeat,
99
+ hasQueuedNonHeartbeat: queuedNonHeartbeat > 0,
100
+ }
101
+ }
102
+
103
+ export function getRunById(runId: string): SessionRunRecord | null {
104
+ return state.runs.get(runId) || loadPersistedRun(runId)
105
+ }
106
+
107
+ export function listRuns(params?: {
108
+ sessionId?: string
109
+ status?: SessionRunStatus
110
+ limit?: number
111
+ }): SessionRunRecord[] {
112
+ return listPersistedRuns(params)
113
+ }
114
+
115
+ export function listRunEvents(runId: string, limit?: number): RunEventRecord[] {
116
+ return listPersistedRunEvents(runId, limit)
117
+ }
@@ -0,0 +1,238 @@
1
+ import { log } from '@/lib/server/logger'
2
+ import { errorMessage } from '@/lib/shared-utils'
3
+ import { isAllEstopEngaged, isAutonomyEstopEngaged } from '@/lib/server/runtime/estop'
4
+ import {
5
+ isRestartRecoverableSource,
6
+ listPersistedRuns,
7
+ loadRecoverableStaleRuns,
8
+ } from '@/lib/server/runtime/run-ledger'
9
+
10
+ import {
11
+ abortSessionRuntime,
12
+ clearDeferredDrain,
13
+ decrementNonHeartbeatWork,
14
+ deleteQueueEntry,
15
+ executionKeyForSession,
16
+ markPersistedRunInterrupted,
17
+ normalizeMode,
18
+ now,
19
+ reconcileSessionActivityLease,
20
+ recoveryState,
21
+ STALE_QUEUED_RUN_MS,
22
+ state,
23
+ syncRunRecord,
24
+ STUCK_RUN_THRESHOLD_MS,
25
+ } from './state'
26
+ import type { EnqueueSessionRunInput, SessionRunQueueEntry } from './types'
27
+
28
+ type EnqueueSessionRunFn = (input: EnqueueSessionRunInput) => unknown
29
+ type DrainExecutionFn = (executionKey: string) => Promise<void>
30
+
31
+ function resolveRecoveredQueuedEntry(entry: SessionRunQueueEntry, reason: string): void {
32
+ decrementNonHeartbeatWork(entry)
33
+ if (entry.run.status === 'completed' || entry.run.status === 'failed' || entry.run.status === 'cancelled') {
34
+ entry.run.endedAt = entry.run.endedAt || now()
35
+ } else {
36
+ entry.run.status = 'failed'
37
+ entry.run.endedAt = now()
38
+ }
39
+ entry.run.error = reason
40
+ syncRunRecord(entry.run)
41
+ entry.onEvents.forEach((send) => {
42
+ try {
43
+ send({ t: 'err', text: reason })
44
+ } catch {
45
+ // Subscriber stream can be closed by the client.
46
+ }
47
+ })
48
+ entry.resolve({
49
+ runId: entry.run.id,
50
+ sessionId: entry.run.sessionId,
51
+ ...(entry.run.missionId ? { missionId: entry.run.missionId } : {}),
52
+ text: '',
53
+ persisted: false,
54
+ toolEvents: [],
55
+ error: reason,
56
+ })
57
+ }
58
+
59
+ export function ensureRecoveredPersistedRuns(enqueueSessionRun: EnqueueSessionRunFn): void {
60
+ if (recoveryState.completed) return
61
+ recoveryState.completed = true
62
+ const staleRuns = loadRecoverableStaleRuns()
63
+ if (!staleRuns.length) return
64
+ const recoveryBlocked = isAutonomyEstopEngaged() || isAllEstopEngaged()
65
+
66
+ for (const run of staleRuns) {
67
+ const interrupted = markPersistedRunInterrupted(run, 'Interrupted by server restart before the run completed.')
68
+ const payload = interrupted.recoveryPayload
69
+ if (
70
+ recoveryBlocked
71
+ || interrupted.recoveredFromRestart
72
+ || !payload
73
+ || !isRestartRecoverableSource(interrupted.source)
74
+ ) {
75
+ continue
76
+ }
77
+
78
+ try {
79
+ enqueueSessionRun({
80
+ sessionId: interrupted.sessionId,
81
+ message: payload.message,
82
+ imagePath: payload.imagePath,
83
+ imageUrl: payload.imageUrl,
84
+ attachedFiles: payload.attachedFiles,
85
+ internal: payload.internal,
86
+ source: payload.source,
87
+ mode: normalizeMode(payload.mode, payload.internal),
88
+ dedupeKey: interrupted.dedupeKey,
89
+ maxRuntimeMs: payload.maxRuntimeMs,
90
+ modelOverride: payload.modelOverride,
91
+ heartbeatConfig: payload.heartbeatConfig,
92
+ replyToId: payload.replyToId,
93
+ executionGroupKey: payload.executionGroupKey,
94
+ recoveredFromRestart: true,
95
+ recoveredFromRunId: interrupted.id,
96
+ })
97
+ } catch (err: unknown) {
98
+ log.warn('session-run', `Failed to requeue interrupted run ${interrupted.id}`, {
99
+ sessionId: interrupted.sessionId,
100
+ error: errorMessage(err),
101
+ })
102
+ }
103
+ }
104
+ }
105
+
106
+ export function repairSessionRunQueue(
107
+ sessionId: string,
108
+ drainExecution: DrainExecutionFn,
109
+ opts?: {
110
+ executionKey?: string
111
+ maxQueuedAgeMs?: number
112
+ reason?: string
113
+ },
114
+ ): {
115
+ kickedExecutionKeys: number
116
+ recoveredQueuedRuns: number
117
+ } {
118
+ const maxQueuedAgeMs = Math.max(1_000, opts?.maxQueuedAgeMs ?? STALE_QUEUED_RUN_MS)
119
+ const reason = opts?.reason || 'Recovered stale queued run'
120
+ const targetExecutionKey = typeof opts?.executionKey === 'string' && opts.executionKey.trim()
121
+ ? opts.executionKey.trim()
122
+ : null
123
+ const queuedNow = now()
124
+ let kickedExecutionKeys = 0
125
+ let recoveredQueuedRuns = 0
126
+
127
+ for (const [executionKey, queue] of state.queueByExecution.entries()) {
128
+ if (targetExecutionKey && executionKey !== targetExecutionKey) continue
129
+ if (!queue.length) {
130
+ clearDeferredDrain(executionKey)
131
+ state.queueByExecution.delete(executionKey)
132
+ continue
133
+ }
134
+ if (state.runningByExecution.has(executionKey)) continue
135
+
136
+ const matching = queue.filter((entry) => entry.run.sessionId === sessionId)
137
+ if (!matching.length) continue
138
+
139
+ for (const entry of [...matching]) {
140
+ const missingPromise = !state.promises.has(entry.run.id)
141
+ const previousStatus = entry.run.status
142
+ const nonQueued = previousStatus !== 'queued'
143
+ const ageMs = Math.max(0, queuedNow - (entry.run.queuedAt || 0))
144
+ const stale = nonQueued || missingPromise || ageMs >= maxQueuedAgeMs
145
+ if (!stale) continue
146
+ if (!deleteQueueEntry(queue, entry)) continue
147
+ clearDeferredDrain(executionKey)
148
+ resolveRecoveredQueuedEntry(entry, reason)
149
+ recoveredQueuedRuns += 1
150
+ log.warn('session-run', `Recovered stale queued run ${entry.run.id}`, {
151
+ sessionId: entry.run.sessionId,
152
+ executionKey,
153
+ source: entry.run.source,
154
+ ageMs,
155
+ missingPromise,
156
+ previousStatus,
157
+ })
158
+ }
159
+
160
+ if (!queue.length) {
161
+ clearDeferredDrain(executionKey)
162
+ state.queueByExecution.delete(executionKey)
163
+ continue
164
+ }
165
+
166
+ if (queue.some((entry) => entry.run.sessionId === sessionId)) {
167
+ clearDeferredDrain(executionKey)
168
+ kickedExecutionKeys += 1
169
+ void drainExecution(executionKey)
170
+ }
171
+ }
172
+
173
+ if (recoveredQueuedRuns > 0) reconcileSessionActivityLease(sessionId)
174
+ return { kickedExecutionKeys, recoveredQueuedRuns }
175
+ }
176
+
177
+ export function sweepStuckRuns(enqueueSessionRun: EnqueueSessionRunFn): { aborted: number } {
178
+ const deadline = now()
179
+ let aborted = 0
180
+
181
+ for (const [execKey, entry] of state.runningByExecution.entries()) {
182
+ const age = deadline - (entry.run.startedAt || entry.run.queuedAt)
183
+ if (entry.maxRuntimeMs && age < entry.maxRuntimeMs * 1.5) continue
184
+ if (age < STUCK_RUN_THRESHOLD_MS) continue
185
+
186
+ abortSessionRuntime(entry, 'Watchdog: run exceeded maximum allowed duration')
187
+ state.runningByExecution.delete(execKey)
188
+ decrementNonHeartbeatWork(entry)
189
+ reconcileSessionActivityLease(entry.run.sessionId)
190
+ aborted++
191
+ }
192
+
193
+ const persistedRunning = listPersistedRuns({ status: 'running' })
194
+ for (const run of persistedRunning) {
195
+ const execKey = run.recoveryPayload?.executionGroupKey || executionKeyForSession(run.sessionId)
196
+ const inMemory = state.runningByExecution.get(execKey)
197
+ if (inMemory && inMemory.run.id === run.id) continue
198
+
199
+ const age = deadline - (run.startedAt || run.queuedAt)
200
+ if (age < STUCK_RUN_THRESHOLD_MS) continue
201
+
202
+ markPersistedRunInterrupted(run, 'Watchdog: orphaned run detected after server restart or HMR')
203
+ aborted++
204
+
205
+ const alreadyRunning = state.runningByExecution.has(execKey)
206
+ const alreadyQueued = (state.queueByExecution.get(execKey) || []).some((entry) => entry.run.sessionId === run.sessionId)
207
+ if (run.recoveryPayload && isRestartRecoverableSource(run.source) && !alreadyRunning && !alreadyQueued) {
208
+ try {
209
+ const payload = run.recoveryPayload
210
+ enqueueSessionRun({
211
+ sessionId: run.sessionId,
212
+ message: payload.message,
213
+ imagePath: payload.imagePath,
214
+ imageUrl: payload.imageUrl,
215
+ attachedFiles: payload.attachedFiles,
216
+ internal: payload.internal,
217
+ source: payload.source,
218
+ mode: normalizeMode(payload.mode, payload.internal),
219
+ dedupeKey: run.dedupeKey,
220
+ maxRuntimeMs: payload.maxRuntimeMs,
221
+ modelOverride: payload.modelOverride,
222
+ heartbeatConfig: payload.heartbeatConfig,
223
+ replyToId: payload.replyToId,
224
+ executionGroupKey: payload.executionGroupKey,
225
+ recoveredFromRestart: true,
226
+ recoveredFromRunId: run.id,
227
+ })
228
+ } catch (err: unknown) {
229
+ log.warn('session-run', `Watchdog: failed to re-enqueue orphaned run ${run.id}`, {
230
+ sessionId: run.sessionId,
231
+ error: errorMessage(err),
232
+ })
233
+ }
234
+ }
235
+ }
236
+
237
+ return { aborted }
238
+ }