@swarmclawai/swarmclaw 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/README.md +16 -85
  2. package/bin/server-cmd.js +64 -1
  3. package/package.json +2 -2
  4. package/skills/coding-agent/SKILL.md +111 -0
  5. package/skills/github/SKILL.md +140 -0
  6. package/skills/nano-banana-pro/SKILL.md +62 -0
  7. package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
  8. package/skills/nano-pdf/SKILL.md +53 -0
  9. package/skills/openai-image-gen/SKILL.md +78 -0
  10. package/skills/openai-image-gen/scripts/gen.py +328 -0
  11. package/skills/resourceful-problem-solving/SKILL.md +49 -0
  12. package/skills/skill-creator/SKILL.md +147 -0
  13. package/skills/skill-creator/scripts/init_skill.py +378 -0
  14. package/skills/skill-creator/scripts/quick_validate.py +159 -0
  15. package/skills/summarize/SKILL.md +77 -0
  16. package/src/app/api/auth/route.ts +20 -5
  17. package/src/app/api/chats/[id]/devserver/route.ts +13 -19
  18. package/src/app/api/chats/[id]/messages/route.ts +13 -15
  19. package/src/app/api/chats/[id]/route.ts +9 -10
  20. package/src/app/api/chats/[id]/stop/route.ts +5 -7
  21. package/src/app/api/chats/messages-route.test.ts +8 -6
  22. package/src/app/api/chats/route.ts +9 -10
  23. package/src/app/api/ip/route.ts +2 -2
  24. package/src/app/api/preview-server/route.ts +1 -1
  25. package/src/app/api/projects/[id]/route.ts +7 -46
  26. package/src/cli/server-cmd.test.js +74 -0
  27. package/src/components/chat/chat-area.tsx +45 -23
  28. package/src/components/chat/message-bubble.test.ts +35 -0
  29. package/src/components/chat/message-bubble.tsx +19 -9
  30. package/src/components/chat/message-list.tsx +37 -3
  31. package/src/components/input/chat-input.tsx +34 -14
  32. package/src/components/openclaw/openclaw-deploy-panel.tsx +4 -0
  33. package/src/instrumentation.ts +1 -1
  34. package/src/lib/chat/assistant-render-id.ts +3 -0
  35. package/src/lib/chat/chat-streaming-state.test.ts +42 -3
  36. package/src/lib/chat/chat-streaming-state.ts +20 -8
  37. package/src/lib/chat/queued-message-queue.test.ts +23 -1
  38. package/src/lib/chat/queued-message-queue.ts +11 -2
  39. package/src/lib/providers/cli-utils.test.ts +124 -0
  40. package/src/lib/server/activity/activity-log.ts +21 -0
  41. package/src/lib/server/agents/agent-availability.test.ts +10 -5
  42. package/src/lib/server/agents/agent-cascade.ts +79 -59
  43. package/src/lib/server/agents/agent-registry.ts +3 -1
  44. package/src/lib/server/agents/agent-repository.ts +90 -0
  45. package/src/lib/server/agents/delegation-job-repository.ts +53 -0
  46. package/src/lib/server/agents/delegation-jobs.ts +11 -4
  47. package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
  48. package/src/lib/server/agents/guardian.ts +2 -2
  49. package/src/lib/server/agents/main-agent-loop.ts +10 -3
  50. package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
  51. package/src/lib/server/agents/subagent-runtime.ts +9 -6
  52. package/src/lib/server/agents/subagent-swarm.ts +3 -2
  53. package/src/lib/server/agents/task-session.ts +3 -4
  54. package/src/lib/server/approvals/approval-repository.ts +30 -0
  55. package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
  56. package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
  57. package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
  58. package/src/lib/server/chat-execution/chat-execution.ts +84 -1926
  59. package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
  60. package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
  61. package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
  62. package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
  63. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
  64. package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
  65. package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
  66. package/src/lib/server/chat-execution/post-stream-finalization.ts +1 -1
  67. package/src/lib/server/chat-execution/prompt-builder.ts +11 -0
  68. package/src/lib/server/chat-execution/prompt-sections.ts +5 -6
  69. package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
  70. package/src/lib/server/chat-execution/stream-agent-chat.ts +16 -13
  71. package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
  72. package/src/lib/server/connectors/connector-repository.ts +58 -0
  73. package/src/lib/server/connectors/runtime-state.test.ts +117 -0
  74. package/src/lib/server/credentials/credential-repository.ts +7 -0
  75. package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
  76. package/src/lib/server/memory/memory-abstract.test.ts +59 -0
  77. package/src/lib/server/missions/mission-repository.ts +74 -0
  78. package/src/lib/server/missions/mission-service/actions.ts +6 -0
  79. package/src/lib/server/missions/mission-service/bindings.ts +9 -0
  80. package/src/lib/server/missions/mission-service/context.ts +4 -0
  81. package/src/lib/server/missions/mission-service/core.ts +2269 -0
  82. package/src/lib/server/missions/mission-service/queries.ts +12 -0
  83. package/src/lib/server/missions/mission-service/recovery.ts +5 -0
  84. package/src/lib/server/missions/mission-service/ticks.ts +9 -0
  85. package/src/lib/server/missions/mission-service.test.ts +9 -2
  86. package/src/lib/server/missions/mission-service.ts +6 -2266
  87. package/src/lib/server/openclaw/deploy.test.ts +42 -3
  88. package/src/lib/server/openclaw/deploy.ts +26 -12
  89. package/src/lib/server/persistence/repository-utils.ts +154 -0
  90. package/src/lib/server/persistence/storage-context.ts +51 -0
  91. package/src/lib/server/persistence/transaction.ts +1 -0
  92. package/src/lib/server/projects/project-repository.ts +36 -0
  93. package/src/lib/server/projects/project-service.ts +79 -0
  94. package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
  95. package/src/lib/server/runtime/alert-dispatch.ts +1 -1
  96. package/src/lib/server/runtime/daemon-policy.ts +1 -1
  97. package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
  98. package/src/lib/server/runtime/daemon-state/health.ts +6 -0
  99. package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
  100. package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
  101. package/src/lib/server/runtime/daemon-state.test.ts +48 -0
  102. package/src/lib/server/runtime/daemon-state.ts +3 -1470
  103. package/src/lib/server/runtime/estop-repository.ts +4 -0
  104. package/src/lib/server/runtime/estop.ts +3 -1
  105. package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
  106. package/src/lib/server/runtime/heartbeat-service.ts +55 -34
  107. package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
  108. package/src/lib/server/runtime/idle-window.ts +2 -2
  109. package/src/lib/server/runtime/network.ts +11 -0
  110. package/src/lib/server/runtime/orchestrator-events.ts +2 -2
  111. package/src/lib/server/runtime/queue/claims.ts +4 -0
  112. package/src/lib/server/runtime/queue/core.ts +2079 -0
  113. package/src/lib/server/runtime/queue/execution.ts +7 -0
  114. package/src/lib/server/runtime/queue/followups.ts +4 -0
  115. package/src/lib/server/runtime/queue/queries.ts +12 -0
  116. package/src/lib/server/runtime/queue/recovery.ts +7 -0
  117. package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
  118. package/src/lib/server/runtime/queue-repository.ts +17 -0
  119. package/src/lib/server/runtime/queue.ts +5 -2061
  120. package/src/lib/server/runtime/run-ledger.ts +6 -5
  121. package/src/lib/server/runtime/run-repository.ts +73 -0
  122. package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
  123. package/src/lib/server/runtime/runtime-settings.ts +1 -1
  124. package/src/lib/server/runtime/runtime-state.ts +99 -0
  125. package/src/lib/server/runtime/scheduler.ts +4 -2
  126. package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
  127. package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
  128. package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
  129. package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
  130. package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
  131. package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
  132. package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
  133. package/src/lib/server/runtime/session-run-manager.ts +72 -1377
  134. package/src/lib/server/runtime/watch-job-repository.ts +35 -0
  135. package/src/lib/server/runtime/watch-jobs.ts +3 -1
  136. package/src/lib/server/schedules/schedule-repository.ts +42 -0
  137. package/src/lib/server/sessions/session-repository.ts +85 -0
  138. package/src/lib/server/settings/settings-repository.ts +25 -0
  139. package/src/lib/server/skills/skill-discovery.test.ts +2 -2
  140. package/src/lib/server/skills/skill-discovery.ts +2 -2
  141. package/src/lib/server/skills/skill-repository.ts +14 -0
  142. package/src/lib/server/storage.ts +13 -24
  143. package/src/lib/server/tasks/task-repository.ts +54 -0
  144. package/src/lib/server/usage/usage-repository.ts +30 -0
  145. package/src/lib/server/webhooks/webhook-repository.ts +10 -0
  146. package/src/lib/strip-internal-metadata.test.ts +42 -41
  147. package/src/stores/use-chat-store.test.ts +54 -0
  148. package/src/stores/use-chat-store.ts +21 -5
  149. /package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0
@@ -0,0 +1,2079 @@
1
+ import { log } from '@/lib/server/logger'
2
+ import { matchesCapabilities, filterAgentsByCapabilities, capabilityMatchScore } from '@/lib/server/agents/capability-match'
3
+ import { genId } from '@/lib/id'
4
+ import { dedup, hmrSingleton, jitteredBackoff } from '@/lib/shared-utils'
5
+ import fs from 'node:fs'
6
+ import path from 'node:path'
7
+ import { logActivity } from '@/lib/server/activity/activity-log'
8
+ import { loadAgents } from '@/lib/server/agents/agent-repository'
9
+ import { withTransaction } from '@/lib/server/persistence/transaction'
10
+ import { loadQueue, saveQueue } from '@/lib/server/runtime/queue-repository'
11
+ import { loadSchedules, saveSchedules } from '@/lib/server/schedules/schedule-repository'
12
+ import { loadSessions, saveSessions } from '@/lib/server/sessions/session-repository'
13
+ import { loadSettings } from '@/lib/server/settings/settings-repository'
14
+ import { loadTasks, saveTasks } from '@/lib/server/tasks/task-repository'
15
+ import { notify } from '@/lib/server/ws-hub'
16
+ import { perf } from '@/lib/server/runtime/perf'
17
+ import { WORKSPACE_DIR } from '@/lib/server/data-dir'
18
+ import { createAgentTaskSession } from '@/lib/server/agents/task-session'
19
+ import { formatValidationFailure } from '@/lib/server/tasks/task-validation'
20
+ import { pushMainLoopEventToMainSessions } from '@/lib/server/agents/main-agent-loop'
21
+ import { executeSessionChatTurn, type ExecuteChatTurnResult } from '@/lib/server/chat-execution/chat-execution'
22
+ import { checkAgentBudgetLimits } from '@/lib/server/cost'
23
+ import { extractTaskResult, formatResultBody } from '@/lib/server/tasks/task-result'
24
+ import {
25
+ assessAutonomyRun,
26
+ classifyRuntimeFailure,
27
+ observeAutonomyRunOutcome,
28
+ recordSupervisorIncident,
29
+ } from '@/lib/server/autonomy/supervisor-reflection'
30
+ import {
31
+ collectTaskConnectorFollowupTargets as collectTaskConnectorFollowupTargetsImpl,
32
+ extractLikelyOutputFiles,
33
+ isSendableAttachment,
34
+ maybeResolveUploadMediaPathFromUrl,
35
+ notifyConnectorTaskFollowups,
36
+ resolveExistingOutputFilePath,
37
+ resolveTaskOriginConnectorFollowupTarget as resolveTaskOriginConnectorFollowupTargetImpl,
38
+ type ScheduleTaskMeta,
39
+ type SessionLike,
40
+ } from '@/lib/server/tasks/task-followups'
41
+ import { getCheckpointSaver } from '@/lib/server/langgraph-checkpoint'
42
+ import { cascadeUnblock } from '@/lib/server/dag-validation'
43
+ import { captureGuardianCheckpoint, prepareGuardianRecovery } from '@/lib/server/agents/guardian'
44
+ import { notifyOrchestrators } from '@/lib/server/runtime/orchestrator-events'
45
+ import type { Agent, BoardTask, Message, Session } from '@/types'
46
+ import { buildAgentDisabledMessage, isAgentDisabled } from '@/lib/server/agents/agent-availability'
47
+ import {
48
+ didTaskValidationChange,
49
+ markInvalidCompletedTaskFailed,
50
+ markValidatedTaskCompleted,
51
+ refreshTaskCompletionValidation,
52
+ } from '@/lib/server/tasks/task-lifecycle'
53
+ import { noteMissionTaskFinished, noteMissionTaskStarted } from '@/lib/server/missions/mission-service'
54
+
55
+ const TAG = 'queue'
56
+
57
+ export const collectTaskConnectorFollowupTargets = collectTaskConnectorFollowupTargetsImpl
58
+ export const resolveTaskOriginConnectorFollowupTarget = resolveTaskOriginConnectorFollowupTargetImpl
59
+
60
+ // HMR-safe: pin processing state to globalThis so hot reloads don't reset it
61
+ const _queueState = hmrSingleton('__swarmclaw_queue__', () => ({
62
+ activeCount: 0,
63
+ maxConcurrent: 3,
64
+ pendingKick: false,
65
+ }))
66
+
67
+ function normalizeInt(value: unknown, fallback: number, min: number, max: number): number {
68
+ const parsed = typeof value === 'number'
69
+ ? value
70
+ : typeof value === 'string'
71
+ ? Number.parseInt(value, 10)
72
+ : Number.NaN
73
+ if (!Number.isFinite(parsed)) return fallback
74
+ return Math.max(min, Math.min(max, Math.trunc(parsed)))
75
+ }
76
+
77
+ const OPENCLAW_USE_CASE_TAGS = new Set([
78
+ 'local-dev',
79
+ 'single-vps',
80
+ 'private-tailnet',
81
+ 'browser-heavy',
82
+ 'team-control',
83
+ ])
84
+
85
+ function deriveTaskRoutePreferences(task: BoardTask): {
86
+ preferredGatewayTags?: string[]
87
+ preferredGatewayUseCase?: string | null
88
+ } {
89
+ const tags = Array.isArray(task.tags)
90
+ ? dedup(task.tags.map((tag) => (typeof tag === 'string' ? tag.trim().toLowerCase() : '')).filter(Boolean))
91
+ : []
92
+ const customUseCase = typeof task.customFields?.openclawUseCase === 'string'
93
+ ? task.customFields.openclawUseCase
94
+ : typeof task.customFields?.gatewayUseCase === 'string'
95
+ ? task.customFields.gatewayUseCase
96
+ : null
97
+ const preferredGatewayUseCase = customUseCase && OPENCLAW_USE_CASE_TAGS.has(customUseCase)
98
+ ? customUseCase
99
+ : (tags.find((tag) => OPENCLAW_USE_CASE_TAGS.has(tag)) || null)
100
+ const preferredGatewayTags = tags.filter((tag) => tag !== preferredGatewayUseCase)
101
+ return {
102
+ preferredGatewayTags,
103
+ preferredGatewayUseCase,
104
+ }
105
+ }
106
+
107
+ function resolveTaskPolicy(task: BoardTask): { maxAttempts: number; backoffSec: number } {
108
+ const settings = loadSettings()
109
+ const defaultMaxAttempts = normalizeInt(settings.defaultTaskMaxAttempts, 3, 1, 20)
110
+ const defaultBackoffSec = normalizeInt(settings.taskRetryBackoffSec, 30, 1, 3600)
111
+ const maxAttempts = normalizeInt(task.maxAttempts, defaultMaxAttempts, 1, 20)
112
+ const backoffSec = normalizeInt(task.retryBackoffSec, defaultBackoffSec, 1, 3600)
113
+ return { maxAttempts, backoffSec }
114
+ }
115
+
116
+ function applyTaskPolicyDefaults(task: BoardTask): void {
117
+ const policy = resolveTaskPolicy(task)
118
+ if (typeof task.attempts !== 'number' || task.attempts < 0) task.attempts = 0
119
+ task.maxAttempts = policy.maxAttempts
120
+ task.retryBackoffSec = policy.backoffSec
121
+ if (task.retryScheduledAt === undefined) task.retryScheduledAt = null
122
+ if (task.deadLetteredAt === undefined) task.deadLetteredAt = null
123
+ }
124
+
125
+ export interface TaskResumeState {
126
+ claudeSessionId: string | null
127
+ codexThreadId: string | null
128
+ opencodeSessionId: string | null
129
+ delegateResumeIds: NonNullable<Session['delegateResumeIds']>
130
+ }
131
+
132
+ export interface TaskResumeContext {
133
+ source: 'self' | 'delegated_from_task' | 'blocked_by'
134
+ sourceTaskId: string
135
+ sourceTaskTitle: string
136
+ sourceSessionId: string | null
137
+ resume: TaskResumeState
138
+ }
139
+
140
+ function normalizeResumeHandle(value: unknown): string | null {
141
+ return typeof value === 'string' && value.trim() ? value.trim() : null
142
+ }
143
+
144
+ function buildEmptyDelegateResumeIds(): NonNullable<Session['delegateResumeIds']> {
145
+ return {
146
+ claudeCode: null,
147
+ codex: null,
148
+ opencode: null,
149
+ gemini: null,
150
+ }
151
+ }
152
+
153
+ function normalizeCliProvider(value: unknown): string | null {
154
+ return typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : null
155
+ }
156
+
157
+ function hasResumeState(state: TaskResumeState | null | undefined): state is TaskResumeState {
158
+ if (!state) return false
159
+ return Boolean(
160
+ state.claudeSessionId
161
+ || state.codexThreadId
162
+ || state.opencodeSessionId
163
+ || state.delegateResumeIds.claudeCode
164
+ || state.delegateResumeIds.codex
165
+ || state.delegateResumeIds.opencode
166
+ || state.delegateResumeIds.gemini
167
+ )
168
+ }
169
+
170
+ export function extractTaskResumeState(task: Partial<BoardTask> | null | undefined): TaskResumeState | null {
171
+ if (!task) return null
172
+
173
+ const legacyResumeId = normalizeResumeHandle(task.cliResumeId)
174
+ const legacyProvider = normalizeCliProvider(task.cliProvider)
175
+ const claudeSessionId = normalizeResumeHandle(task.claudeResumeId)
176
+ || (legacyProvider === 'claude-cli' ? legacyResumeId : null)
177
+ const codexThreadId = normalizeResumeHandle(task.codexResumeId)
178
+ || (legacyProvider === 'codex-cli' ? legacyResumeId : null)
179
+ const opencodeSessionId = normalizeResumeHandle(task.opencodeResumeId)
180
+ || (legacyProvider === 'opencode-cli' ? legacyResumeId : null)
181
+ const geminiSessionId = normalizeResumeHandle(task.geminiResumeId)
182
+ || (legacyProvider === 'gemini-cli' ? legacyResumeId : null)
183
+
184
+ const resume = {
185
+ claudeSessionId,
186
+ codexThreadId,
187
+ opencodeSessionId,
188
+ delegateResumeIds: {
189
+ claudeCode: claudeSessionId,
190
+ codex: codexThreadId,
191
+ opencode: opencodeSessionId,
192
+ gemini: geminiSessionId,
193
+ },
194
+ } satisfies TaskResumeState
195
+
196
+ return hasResumeState(resume) ? resume : null
197
+ }
198
+
199
+ export function extractSessionResumeState(session: Partial<Session> | null | undefined): TaskResumeState | null {
200
+ if (!session) return null
201
+
202
+ const claudeSessionId = normalizeResumeHandle(session.claudeSessionId)
203
+ const codexThreadId = normalizeResumeHandle(session.codexThreadId)
204
+ const opencodeSessionId = normalizeResumeHandle(session.opencodeSessionId)
205
+ const delegateResumeIds = session.delegateResumeIds && typeof session.delegateResumeIds === 'object'
206
+ ? { ...buildEmptyDelegateResumeIds(), ...session.delegateResumeIds }
207
+ : buildEmptyDelegateResumeIds()
208
+
209
+ const resume = {
210
+ claudeSessionId,
211
+ codexThreadId,
212
+ opencodeSessionId,
213
+ delegateResumeIds: {
214
+ claudeCode: normalizeResumeHandle(delegateResumeIds.claudeCode) || claudeSessionId,
215
+ codex: normalizeResumeHandle(delegateResumeIds.codex) || codexThreadId,
216
+ opencode: normalizeResumeHandle(delegateResumeIds.opencode) || opencodeSessionId,
217
+ gemini: normalizeResumeHandle(delegateResumeIds.gemini),
218
+ },
219
+ } satisfies TaskResumeState
220
+
221
+ return hasResumeState(resume) ? resume : null
222
+ }
223
+
224
+ export function resolveTaskResumeContext(
225
+ task: BoardTask,
226
+ tasksById: Record<string, BoardTask>,
227
+ sessionsById?: Record<string, SessionLike | Session>,
228
+ ): TaskResumeContext | null {
229
+ const candidates: Array<{ source: TaskResumeContext['source']; taskId: string | null | undefined }> = [
230
+ { source: 'self', taskId: task.id },
231
+ { source: 'delegated_from_task', taskId: task.delegatedFromTaskId },
232
+ ...((Array.isArray(task.blockedBy) ? task.blockedBy : []).map((taskId) => ({ source: 'blocked_by' as const, taskId }))),
233
+ ]
234
+ const seen = new Set<string>()
235
+
236
+ for (const candidate of candidates) {
237
+ const taskId = typeof candidate.taskId === 'string' ? candidate.taskId.trim() : ''
238
+ if (!taskId || seen.has(taskId)) continue
239
+ seen.add(taskId)
240
+ const sourceTask = taskId === task.id ? task : tasksById[taskId]
241
+ if (!sourceTask) continue
242
+ const sourceSessionId = normalizeResumeHandle(sourceTask.checkpoint?.lastSessionId) || normalizeResumeHandle(sourceTask.sessionId)
243
+ const resume = extractTaskResumeState(sourceTask)
244
+ || (sourceSessionId && sessionsById?.[sourceSessionId]
245
+ ? extractSessionResumeState(sessionsById[sourceSessionId] as Session)
246
+ : null)
247
+ if (!resume) continue
248
+ return {
249
+ source: candidate.source,
250
+ sourceTaskId: sourceTask.id,
251
+ sourceTaskTitle: sourceTask.title,
252
+ sourceSessionId,
253
+ resume,
254
+ }
255
+ }
256
+
257
+ return null
258
+ }
259
+
260
+ export function applyTaskResumeStateToSession(session: Session, resume: TaskResumeState | null | undefined): boolean {
261
+ if (!hasResumeState(resume)) return false
262
+
263
+ let changed = false
264
+ const directFields: Array<['claudeSessionId' | 'codexThreadId' | 'opencodeSessionId', string | null]> = [
265
+ ['claudeSessionId', resume.claudeSessionId],
266
+ ['codexThreadId', resume.codexThreadId],
267
+ ['opencodeSessionId', resume.opencodeSessionId],
268
+ ]
269
+ for (const [key, value] of directFields) {
270
+ if (!value || session[key] === value) continue
271
+ session[key] = value
272
+ changed = true
273
+ }
274
+
275
+ const currentDelegateResume = session.delegateResumeIds && typeof session.delegateResumeIds === 'object'
276
+ ? { ...buildEmptyDelegateResumeIds(), ...session.delegateResumeIds }
277
+ : buildEmptyDelegateResumeIds()
278
+ for (const [key, value] of Object.entries(resume.delegateResumeIds) as Array<[keyof NonNullable<Session['delegateResumeIds']>, string | null]>) {
279
+ if (!value || currentDelegateResume[key] === value) continue
280
+ currentDelegateResume[key] = value
281
+ changed = true
282
+ }
283
+ if (changed) session.delegateResumeIds = currentDelegateResume
284
+ return changed
285
+ }
286
+
287
+ export function resolveReusableTaskSessionId(
288
+ task: BoardTask,
289
+ tasks: Record<string, BoardTask>,
290
+ sessions: Record<string, SessionLike>,
291
+ ): string {
292
+ const candidateTaskIds = [
293
+ task.id,
294
+ typeof task.delegatedFromTaskId === 'string' ? task.delegatedFromTaskId : '',
295
+ ...(Array.isArray(task.blockedBy) ? task.blockedBy : []),
296
+ ]
297
+ const seen = new Set<string>()
298
+ for (const candidateTaskId of candidateTaskIds) {
299
+ const taskId = typeof candidateTaskId === 'string' ? candidateTaskId.trim() : ''
300
+ if (!taskId || seen.has(taskId)) continue
301
+ seen.add(taskId)
302
+ const sourceTask = taskId === task.id ? task : tasks[taskId]
303
+ if (!sourceTask) continue
304
+ const candidates = [
305
+ normalizeResumeHandle(sourceTask.checkpoint?.lastSessionId),
306
+ normalizeResumeHandle(sourceTask.sessionId),
307
+ ]
308
+ for (const candidate of candidates) {
309
+ if (candidate && sessions[candidate]) return candidate
310
+ }
311
+ }
312
+ return ''
313
+ }
314
+
315
+ function buildTaskContinuationNote(
316
+ reusedExistingSession: boolean,
317
+ resumeContext: TaskResumeContext | null,
318
+ ): string {
319
+ const notes: string[] = []
320
+ if (reusedExistingSession) {
321
+ notes.push('Reusing the previous execution session for this task.')
322
+ }
323
+ if (resumeContext?.source === 'delegated_from_task' || resumeContext?.source === 'blocked_by') {
324
+ notes.push(`Stored CLI context is available from related task "${resumeContext.sourceTaskTitle}".`)
325
+ } else if (resumeContext?.source === 'self' && !reusedExistingSession) {
326
+ notes.push('Stored CLI resume handles are available for continuation.')
327
+ }
328
+ return notes.length ? `\n\n${notes.join(' ')}` : ''
329
+ }
330
+
331
+ const DEV_TASK_HINT = /\b(dev(?:\s+server)?|start(?:ing)?\s+(?:the\s+)?server|run(?:ning)?\s+(?:the\s+)?(?:app|project|site)|serve|localhost|http\s+server|web\s+server|npm\b|pnpm\b|yarn\b|bun\b|vite|next(?:\.js)?|react|build|compile)\b/i
332
+ const TASK_CWD_NOISE_DIRS = new Set([
333
+ 'uploads',
334
+ 'data',
335
+ 'projects',
336
+ 'tasks',
337
+ '.swarm-data-test',
338
+ '.git',
339
+ '.next',
340
+ 'node_modules',
341
+ ])
342
+ const PROJECT_MARKER_FILES = ['package.json', 'pyproject.toml', 'Cargo.toml', 'go.mod', '.git']
343
+ const SOURCE_MARKER_DIRS = ['src', 'app', 'public', 'pages']
344
+ const WORKSPACE_PROJECTS_DIR = path.join(WORKSPACE_DIR, 'projects')
345
+
346
+ interface WorkspaceDirCandidate {
347
+ dir: string
348
+ name: string
349
+ hasProjectMarker: boolean
350
+ hasSourceMarker: boolean
351
+ }
352
+
353
+ let workspaceDirCache: { expiresAt: number; candidates: WorkspaceDirCandidate[] } | null = null
354
+
355
+ function isExistingDirectory(dirPath: string): boolean {
356
+ try {
357
+ return fs.statSync(dirPath).isDirectory()
358
+ } catch {
359
+ return false
360
+ }
361
+ }
362
+
363
+ function isWithinDirectory(parent: string, child: string): boolean {
364
+ const parentResolved = path.resolve(parent)
365
+ const childResolved = path.resolve(child)
366
+ const rel = path.relative(parentResolved, childResolved)
367
+ return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel))
368
+ }
369
+
370
+ function normalizeForMatch(value: string): string {
371
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim()
372
+ }
373
+
374
+ function hasAnyMarker(dirPath: string, markers: string[]): boolean {
375
+ return markers.some((marker) => fs.existsSync(path.join(dirPath, marker)))
376
+ }
377
+
378
+ function normalizeDirCandidate(raw: unknown, baseDir: string): string | null {
379
+ if (typeof raw !== 'string') return null
380
+ const trimmed = raw.trim()
381
+ if (!trimmed) return null
382
+ const homeDir = process.env.HOME || ''
383
+ const expanded = trimmed === '~'
384
+ ? homeDir
385
+ : trimmed.startsWith('~/')
386
+ ? path.join(homeDir, trimmed.slice(2))
387
+ : trimmed
388
+ const resolved = path.isAbsolute(expanded) ? path.resolve(expanded) : path.resolve(baseDir, expanded)
389
+ return isExistingDirectory(resolved) ? resolved : null
390
+ }
391
+
392
+ function looksLikeDevTask(task: Pick<BoardTask, 'title' | 'description'>): boolean {
393
+ const text = `${task.title || ''} ${task.description || ''}`.trim()
394
+ return DEV_TASK_HINT.test(text)
395
+ }
396
+
397
+ function listWorkspaceDirCandidates(): WorkspaceDirCandidate[] {
398
+ const now = Date.now()
399
+ if (workspaceDirCache && workspaceDirCache.expiresAt > now) return workspaceDirCache.candidates
400
+
401
+ const candidates: WorkspaceDirCandidate[] = []
402
+ const seen = new Set<string>()
403
+ const roots = [WORKSPACE_DIR, WORKSPACE_PROJECTS_DIR]
404
+
405
+ for (const root of roots) {
406
+ if (!isExistingDirectory(root)) continue
407
+ let entries: fs.Dirent[] = []
408
+ try {
409
+ entries = fs.readdirSync(root, { withFileTypes: true })
410
+ } catch {
411
+ continue
412
+ }
413
+ for (const entry of entries) {
414
+ if (!entry.isDirectory()) continue
415
+ const name = entry.name
416
+ if (!name || name.startsWith('.')) continue
417
+ if (TASK_CWD_NOISE_DIRS.has(name)) continue
418
+ const dir = path.join(root, name)
419
+ const key = path.resolve(dir)
420
+ if (seen.has(key)) continue
421
+ seen.add(key)
422
+ candidates.push({
423
+ dir: key,
424
+ name,
425
+ hasProjectMarker: hasAnyMarker(key, PROJECT_MARKER_FILES),
426
+ hasSourceMarker: hasAnyMarker(key, SOURCE_MARKER_DIRS),
427
+ })
428
+ }
429
+ }
430
+
431
+ candidates.sort((a, b) => a.name.localeCompare(b.name))
432
+ workspaceDirCache = {
433
+ expiresAt: now + 15_000,
434
+ candidates,
435
+ }
436
+ return candidates
437
+ }
438
+
439
+ function inferWorkspaceProjectCwd(task: Pick<BoardTask, 'title' | 'description' | 'file'>): string | null {
440
+ const candidates = listWorkspaceDirCandidates()
441
+ if (!candidates.length) return null
442
+
443
+ const taskText = normalizeForMatch(`${task.title || ''} ${task.description || ''} ${task.file || ''}`)
444
+ const devTask = looksLikeDevTask(task)
445
+ const markerCandidates = candidates.filter((candidate) => candidate.hasProjectMarker)
446
+
447
+ let best: { dir: string; score: number } | null = null
448
+ for (const candidate of candidates) {
449
+ const nameNorm = normalizeForMatch(candidate.name)
450
+ if (!nameNorm) continue
451
+ let score = 0
452
+ if (taskText.includes(nameNorm)) score += 8
453
+ for (const token of nameNorm.split(' ')) {
454
+ if (token.length < 3) continue
455
+ if (taskText.includes(token)) score += 1
456
+ }
457
+ if (candidate.hasProjectMarker) score += devTask ? 3 : 1
458
+ if (candidate.hasSourceMarker) score += 1
459
+ if (!best || score > best.score) best = { dir: candidate.dir, score }
460
+ }
461
+
462
+ if (best && best.score >= 4) return best.dir
463
+ if (devTask && markerCandidates.length === 1) return markerCandidates[0].dir
464
+ return null
465
+ }
466
+
467
+ function resolveTaskExecutionCwd(task: ScheduleTaskMeta, sessions: Record<string, SessionLike>): string {
468
+ const workspaceRoot = path.resolve(WORKSPACE_DIR)
469
+
470
+ const explicitCwd = normalizeDirCandidate(task.cwd, workspaceRoot)
471
+ if (explicitCwd) return explicitCwd
472
+
473
+ const projectId = typeof task.projectId === 'string' ? task.projectId.trim() : ''
474
+ if (projectId) {
475
+ const projectDir = path.join(WORKSPACE_PROJECTS_DIR, projectId)
476
+ if (isExistingDirectory(projectDir)) return projectDir
477
+ }
478
+
479
+ const fileRef = typeof task.file === 'string' ? task.file.trim() : ''
480
+ if (fileRef) {
481
+ const filePath = path.isAbsolute(fileRef) ? fileRef : path.resolve(workspaceRoot, fileRef)
482
+ const fileDir = isExistingDirectory(filePath) ? filePath : path.dirname(filePath)
483
+ if (isExistingDirectory(fileDir) && isWithinDirectory(workspaceRoot, fileDir)) return fileDir
484
+ }
485
+
486
+ const inferredCwd = inferWorkspaceProjectCwd(task)
487
+ if (inferredCwd) return inferredCwd
488
+
489
+ const sourceSessionId = typeof task.createdInSessionId === 'string' ? task.createdInSessionId.trim() : ''
490
+ const sourceSessionCwd = sourceSessionId
491
+ ? normalizeDirCandidate(sessions[sourceSessionId]?.cwd, workspaceRoot)
492
+ : null
493
+ if (sourceSessionCwd && path.resolve(sourceSessionCwd) !== workspaceRoot) return sourceSessionCwd
494
+
495
+ const runSessionId = typeof task.sessionId === 'string' ? task.sessionId.trim() : ''
496
+ const runSessionCwd = runSessionId
497
+ ? normalizeDirCandidate(sessions[runSessionId]?.cwd, workspaceRoot)
498
+ : null
499
+ if (runSessionCwd && path.resolve(runSessionCwd) !== workspaceRoot) return runSessionCwd
500
+
501
+ const sandboxDir = path.join(workspaceRoot, 'tasks', task.id)
502
+ fs.mkdirSync(sandboxDir, { recursive: true })
503
+ return sandboxDir
504
+ }
505
+
506
+ function queueContains(queue: string[], id: string): boolean {
507
+ return queue.includes(id)
508
+ }
509
+
510
+ function isCancelledTask(task: Partial<BoardTask> | null | undefined): boolean {
511
+ return task?.status === 'cancelled'
512
+ }
513
+
514
+ function pushQueueUnique(queue: string[], id: string): void {
515
+ if (!queueContains(queue, id)) queue.push(id)
516
+ }
517
+
518
+ function isAgentCreatedTask(task: Partial<BoardTask> | null | undefined): boolean {
519
+ return Boolean(typeof task?.createdByAgentId === 'string' && task.createdByAgentId.trim())
520
+ }
521
+
522
+ function resolveTaskTerminalChatSessionId(
523
+ task: BoardTask,
524
+ sessions: Record<string, SessionLike>,
525
+ ): string | null {
526
+ if (task.status !== 'completed' && task.status !== 'failed') return null
527
+ if (task.sourceType === 'schedule') return null
528
+ if (isAgentCreatedTask(task)) return null
529
+ const createdInSessionId = typeof task.createdInSessionId === 'string'
530
+ ? task.createdInSessionId.trim()
531
+ : ''
532
+ return createdInSessionId && sessions[createdInSessionId] ? createdInSessionId : null
533
+ }
534
+
535
+ interface TaskResultDeliveryData {
536
+ statusLabel: 'completed' | 'failed'
537
+ resultBody: string
538
+ outputFileRefs: string[]
539
+ firstImage?: NonNullable<BoardTask['artifacts']>[number]
540
+ followupMediaPath?: string
541
+ mediaFileName?: string
542
+ execCwd: string
543
+ resumeLines: string[]
544
+ }
545
+
546
+ function collectTaskResultDeliveryData(
547
+ task: BoardTask,
548
+ sessions: Record<string, SessionLike>,
549
+ ): TaskResultDeliveryData {
550
+ const runSessionId = typeof task.sessionId === 'string' ? task.sessionId : ''
551
+ const runSession = runSessionId ? sessions[runSessionId] : null
552
+ const fallbackText = runSession ? latestAssistantText(runSession) : ''
553
+ const taskResult = extractTaskResult(
554
+ runSession,
555
+ task.result || fallbackText || null,
556
+ { sinceTime: typeof task.startedAt === 'number' ? task.startedAt : null },
557
+ )
558
+ const resultBody = formatResultBody(taskResult)
559
+ const outputFileRefs = Array.isArray(task.outputFiles) && task.outputFiles.length > 0
560
+ ? task.outputFiles
561
+ : extractLikelyOutputFiles(resultBody)
562
+ const firstImage = taskResult.artifacts.find((artifact) => artifact.type === 'image')
563
+ const firstArtifactMediaPath = taskResult.artifacts
564
+ .map((artifact) => maybeResolveUploadMediaPathFromUrl(artifact.url))
565
+ .find((candidate): candidate is string => Boolean(candidate))
566
+ const resumeLines: string[] = []
567
+ if (task.claudeResumeId) resumeLines.push(`Claude session: \`${task.claudeResumeId}\``)
568
+ if (task.codexResumeId) resumeLines.push(`Codex thread: \`${task.codexResumeId}\``)
569
+ if (task.opencodeResumeId) resumeLines.push(`OpenCode session: \`${task.opencodeResumeId}\``)
570
+ if (task.geminiResumeId) resumeLines.push(`Gemini session: \`${task.geminiResumeId}\``)
571
+ if (resumeLines.length === 0 && task.cliResumeId) {
572
+ resumeLines.push(`${task.cliProvider || 'CLI'} session: \`${task.cliResumeId}\``)
573
+ }
574
+ const execCwd = runSession?.cwd || ''
575
+ const existingOutputPaths = outputFileRefs
576
+ .map((fileRef: string) => resolveExistingOutputFilePath(fileRef, execCwd))
577
+ .filter((candidate: string | null): candidate is string => Boolean(candidate))
578
+ const firstLocalOutputPath = existingOutputPaths.find((candidate: string) => isSendableAttachment(candidate))
579
+ const followupMediaPath = firstArtifactMediaPath || firstLocalOutputPath || undefined
580
+
581
+ return {
582
+ statusLabel: task.status === 'completed' ? 'completed' : 'failed',
583
+ resultBody,
584
+ outputFileRefs,
585
+ firstImage,
586
+ followupMediaPath,
587
+ mediaFileName: followupMediaPath ? path.basename(followupMediaPath) : undefined,
588
+ execCwd,
589
+ resumeLines,
590
+ }
591
+ }
592
+
593
+ function buildTaskTerminalMessage(
594
+ prefix: string,
595
+ task: BoardTask,
596
+ delivery: TaskResultDeliveryData,
597
+ ): string {
598
+ const parts = [prefix]
599
+ if (delivery.execCwd) parts.push(`Working directory: \`${delivery.execCwd}\``)
600
+ if (delivery.outputFileRefs.length > 0) {
601
+ parts.push(`Output files:\n${delivery.outputFileRefs.slice(0, 8).map((fileRef: string) => `- \`${fileRef}\``).join('\n')}`)
602
+ }
603
+ if (task.completionReportPath) parts.push(`Task report: \`${task.completionReportPath}\``)
604
+ if (delivery.resumeLines.length > 0) parts.push(delivery.resumeLines.join(' | '))
605
+ parts.push(delivery.resultBody || 'No summary.')
606
+ return parts.join('\n\n')
607
+ }
608
+
609
+ function latestAssistantText(session: SessionLike | null | undefined): string {
610
+ if (!Array.isArray(session?.messages)) return ''
611
+ for (let i = session.messages.length - 1; i >= 0; i--) {
612
+ const msg = session.messages[i]
613
+ if (msg?.role !== 'assistant') continue
614
+ const text = typeof msg?.text === 'string' ? msg.text.trim() : ''
615
+ if (!text) continue
616
+ if (/^HEARTBEAT_OK$/i.test(text)) continue
617
+ return text
618
+ }
619
+ return ''
620
+ }
621
+
622
+ // Task result extraction now uses Zod-validated structured data
623
+ // from ./task-result.ts (extractTaskResult, formatResultBody)
624
+
625
+ /** Check if a task result looks incomplete (agent stopped mid-objective). */
626
+ function looksIncomplete(text: string): boolean {
627
+ if (!text) return false
628
+ const trimmed = text.trim()
629
+ // Ends with ellipsis or continuation signal
630
+ if (trimmed.endsWith('...') || trimmed.endsWith('…')) return true
631
+ // Ends with a step/phase header (agent was listing next steps)
632
+ if (/(?:^|\n)#{1,3}\s+(?:Step|Phase|Next)\s+\d/i.test(trimmed.slice(-200))) return true
633
+ // Contains forward-looking language at the end
634
+ const lastChunk = trimmed.slice(-300).toLowerCase()
635
+ if (/\b(?:next i(?:'ll| will)|now i(?:'ll| will)|let me (?:now|next)|moving on to|proceeding to)\b/.test(lastChunk)) return true
636
+ return false
637
+ }
638
+
639
+ function queueTaskAutonomyObservation(input: {
640
+ runId: string
641
+ sessionId: string
642
+ taskId: string
643
+ agentId: string
644
+ status: 'completed' | 'failed' | 'cancelled'
645
+ resultText?: string | null
646
+ error?: string | null
647
+ toolEvents?: ExecuteChatTurnResult['toolEvents']
648
+ sourceMessage?: string | null
649
+ }) {
650
+ void observeAutonomyRunOutcome({
651
+ runId: input.runId,
652
+ sessionId: input.sessionId,
653
+ taskId: input.taskId,
654
+ agentId: input.agentId,
655
+ source: 'task',
656
+ status: input.status,
657
+ resultText: input.resultText,
658
+ error: input.error || undefined,
659
+ toolEvents: input.toolEvents,
660
+ sourceMessage: input.sourceMessage,
661
+ }).catch((err: unknown) => {
662
+ log.warn(TAG, `[queue] Autonomy observation failed for ${input.runId}:`, err)
663
+ })
664
+ }
665
+
666
+ async function executeTaskRun(
667
+ task: BoardTask,
668
+ agent: Agent,
669
+ sessionId: string,
670
+ ): Promise<ExecuteChatTurnResult> {
671
+ if (agent.autoRecovery) {
672
+ const cwd = task.projectId
673
+ ? path.join(WORKSPACE_DIR, 'projects', task.projectId)
674
+ : WORKSPACE_DIR
675
+ captureGuardianCheckpoint(cwd, `task:${task.id}`)
676
+ }
677
+ const settings = loadSettings()
678
+ const basePrompt = task.description || task.title
679
+ const prompt = [
680
+ basePrompt,
681
+ '',
682
+ 'Completion requirements:',
683
+ '- Execute the task before replying; do not reply with only a plan.',
684
+ '- Include concrete evidence in your final summary: changed file paths, commands run, and verification results.',
685
+ '- If blocked, state the blocker explicitly and what input or permission is missing.',
686
+ ].join('\n')
687
+ // All agents go through the unified chat execution path.
688
+ // Agents with delegation enabled get delegation tools automatically via session-tools.
689
+ let latestRun: ExecuteChatTurnResult = await executeSessionChatTurn({
690
+ sessionId,
691
+ message: prompt,
692
+ internal: false,
693
+ source: 'task',
694
+ runId: task.id,
695
+ })
696
+ let text = typeof latestRun.text === 'string' ? latestRun.text.trim() : ''
697
+ let previousSummary: string | null = null
698
+ let totalInputTokens = latestRun.inputTokens || 0
699
+ let totalOutputTokens = latestRun.outputTokens || 0
700
+ let totalEstimatedCost = Number(latestRun.estimatedCost || 0)
701
+ if (latestRun.error) {
702
+ return {
703
+ ...latestRun,
704
+ text,
705
+ }
706
+ }
707
+
708
+ const maxSupervisorFollowups = 2
709
+ for (let followupIndex = 0; followupIndex < maxSupervisorFollowups; followupIndex += 1) {
710
+ const sessions = loadSessions()
711
+ const session = sessions[sessionId] as unknown as Session | undefined
712
+ const assessment = assessAutonomyRun({
713
+ runId: `${task.id}:attempt-${(task.attempts || 0) + 1}:step-${followupIndex + 1}`,
714
+ sessionId,
715
+ taskId: task.id,
716
+ agentId: agent.id,
717
+ source: 'task',
718
+ status: latestRun.error ? 'failed' : 'completed',
719
+ resultText: text,
720
+ error: latestRun.error,
721
+ toolEvents: latestRun.toolEvents,
722
+ mainLoopState: {
723
+ followupChainCount: followupIndex + 1,
724
+ summary: previousSummary,
725
+ missionCostUsd: totalEstimatedCost,
726
+ },
727
+ session: session || null,
728
+ settings,
729
+ })
730
+ if (assessment.shouldBlock) break
731
+ if (assessment.autoActions?.length) {
732
+ const { executeSupervisorAutoActions } = await import('@/lib/server/autonomy/supervisor-reflection')
733
+ const result = await executeSupervisorAutoActions({
734
+ actions: assessment.autoActions,
735
+ sessionId,
736
+ agentId: agent?.id,
737
+ })
738
+ if (result.blocked) break
739
+ }
740
+ const followupMessage = assessment.interventionPrompt
741
+ || (text && looksIncomplete(text)
742
+ ? 'Continue and complete the remaining steps. Provide a final summary when done.'
743
+ : null)
744
+ if (!followupMessage) break
745
+
746
+ // Budget check before follow-up
747
+ const typedAgentForBudget = agent as Agent
748
+ if (typedAgentForBudget.monthlyBudget || typedAgentForBudget.dailyBudget || typedAgentForBudget.hourlyBudget) {
749
+ try {
750
+ const followupBudget = checkAgentBudgetLimits(typedAgentForBudget)
751
+ if (!followupBudget.ok) {
752
+ log.warn(TAG, `[queue] Budget exceeded for "${typedAgentForBudget.name}" during follow-up, stopping.`)
753
+ break
754
+ }
755
+ } catch {}
756
+ }
757
+
758
+ previousSummary = text || previousSummary
759
+ const followUp = await executeSessionChatTurn({
760
+ sessionId,
761
+ message: followupMessage,
762
+ internal: false,
763
+ source: 'task',
764
+ })
765
+ totalInputTokens += followUp.inputTokens || 0
766
+ totalOutputTokens += followUp.outputTokens || 0
767
+ totalEstimatedCost += Number(followUp.estimatedCost || 0)
768
+ text = typeof followUp.text === 'string' ? followUp.text.trim() : ''
769
+ latestRun = {
770
+ ...followUp,
771
+ text,
772
+ inputTokens: totalInputTokens,
773
+ outputTokens: totalOutputTokens,
774
+ estimatedCost: totalEstimatedCost,
775
+ }
776
+ if (latestRun.error) break
777
+ }
778
+
779
+ return {
780
+ ...latestRun,
781
+ text,
782
+ inputTokens: totalInputTokens,
783
+ outputTokens: totalOutputTokens,
784
+ estimatedCost: totalEstimatedCost,
785
+ }
786
+ }
787
+
788
+ function hasFinishedExecutionSession(session: SessionLike | Session | null | undefined): boolean {
789
+ if (!session) return false
790
+ return session.active === false && !session.currentRunId
791
+ }
792
+
793
+ export function reconcileFinishedRunningTasks(): { reconciled: number; deadLettered: number } {
794
+ const tasks = loadTasks()
795
+ const sessions = loadSessions() as Record<string, SessionLike>
796
+ const settings = loadSettings()
797
+ const queue = loadQueue()
798
+ const now = Date.now()
799
+ let reconciled = 0
800
+ let deadLettered = 0
801
+ let tasksDirty = false
802
+ let sessionsDirty = false
803
+ let queueDirty = false
804
+ const terminalTasks: BoardTask[] = []
805
+
806
+ for (const task of Object.values(tasks) as BoardTask[]) {
807
+ if (task.status !== 'running') continue
808
+ const sessionId = typeof task.sessionId === 'string' ? task.sessionId : ''
809
+ if (!sessionId) continue
810
+ const session = sessions[sessionId]
811
+ if (!hasFinishedExecutionSession(session)) continue
812
+
813
+ const fallbackText = latestAssistantText(session)
814
+ if (!fallbackText && !task.result) {
815
+ task.status = 'failed'
816
+ task.result = 'Agent session finished without producing output.'
817
+ task.updatedAt = now
818
+ tasksDirty = true
819
+ continue
820
+ }
821
+
822
+ applyTaskPolicyDefaults(task)
823
+ const taskResult = extractTaskResult(
824
+ session,
825
+ task.result || fallbackText || null,
826
+ { sinceTime: typeof task.startedAt === 'number' ? task.startedAt : null },
827
+ )
828
+ const enrichedResult = formatResultBody(taskResult)
829
+ task.result = enrichedResult.slice(0, 4000) || null
830
+ task.artifacts = taskResult.artifacts.slice(0, 24)
831
+ task.outputFiles = extractLikelyOutputFiles(enrichedResult).slice(0, 24)
832
+ task.updatedAt = now
833
+ const { validation } = refreshTaskCompletionValidation(task, settings)
834
+ if (!task.comments) task.comments = []
835
+
836
+ if (validation.ok) {
837
+ markValidatedTaskCompleted(task, { now })
838
+ task.retryScheduledAt = null
839
+ task.deadLetteredAt = null
840
+ task.checkpoint = {
841
+ ...(task.checkpoint || {}),
842
+ lastRunId: sessionId,
843
+ lastSessionId: sessionId,
844
+ note: 'Recovered completed task state from finished session.',
845
+ updatedAt: now,
846
+ }
847
+ task.comments.push({
848
+ id: genId(),
849
+ author: 'System',
850
+ text: 'Recovered completed task state from a finished execution session.',
851
+ createdAt: now,
852
+ })
853
+ reconciled++
854
+ terminalTasks.push(task)
855
+ } else {
856
+ const failureReason = formatValidationFailure(validation.reasons).slice(0, 500)
857
+ const retryState = scheduleRetryOrDeadLetter(task, failureReason)
858
+ task.completedAt = retryState === 'dead_lettered' ? null : task.completedAt
859
+ task.comments.push({
860
+ id: genId(),
861
+ author: 'System',
862
+ text: `Recovered finished session but the task result failed validation.\n\n${validation.reasons.map((reason) => `- ${reason}`).join('\n')}`,
863
+ createdAt: now,
864
+ })
865
+ if (retryState === 'retry') {
866
+ pushQueueUnique(queue, task.id)
867
+ queueDirty = true
868
+ reconciled++
869
+ pushMainLoopEventToMainSessions({
870
+ type: 'task_retry_scheduled',
871
+ text: `Task retry scheduled: "${task.title}" (${task.id}) attempt ${task.attempts}/${task.maxAttempts} in ${task.retryBackoffSec}s.`,
872
+ })
873
+ } else {
874
+ deadLettered++
875
+ terminalTasks.push(task)
876
+ }
877
+ }
878
+
879
+ if (session.heartbeatEnabled !== false) {
880
+ session.heartbeatEnabled = false
881
+ session.lastActiveAt = now
882
+ sessionsDirty = true
883
+ }
884
+ tasksDirty = true
885
+ }
886
+
887
+ if (tasksDirty) {
888
+ saveTasks(tasks)
889
+ notify('tasks')
890
+ notify('runs')
891
+ }
892
+ if (sessionsDirty) saveSessions(sessions as Record<string, Session>)
893
+ if (queueDirty) saveQueue(queue)
894
+
895
+ for (const task of terminalTasks) {
896
+ if (task.status === 'completed') {
897
+ logActivity({ entityType: 'task', entityId: task.id, action: 'completed', actor: 'system', actorId: task.agentId, summary: `Task completed: "${task.title}"` })
898
+ pushMainLoopEventToMainSessions({
899
+ type: 'task_completed',
900
+ text: `Task completed: "${task.title}" (${task.id})`,
901
+ })
902
+ notifyOrchestrators(`Task completed: "${task.title}"`, `task-complete:${task.id}`)
903
+ } else if (task.status === 'failed') {
904
+ logActivity({ entityType: 'task', entityId: task.id, action: 'failed', actor: 'system', actorId: task.agentId, summary: `Task failed: "${task.title}"` })
905
+ pushMainLoopEventToMainSessions({
906
+ type: 'task_failed',
907
+ text: `Task failed validation: "${task.title}" (${task.id})`,
908
+ })
909
+ notifyOrchestrators(`Task failed: "${task.title}" — validation failure`, `task-fail:${task.id}`)
910
+ }
911
+ handleTerminalTaskResultDeliveries(task)
912
+ cleanupTerminalOneOffSchedule(task)
913
+ }
914
+
915
+ return { reconciled, deadLettered }
916
+ }
917
+
918
+ function cleanupTerminalOneOffSchedule(task: BoardTask): void {
919
+ void task
920
+ }
921
+
922
+ function pushUserFacingTaskResult(task: BoardTask, sessions: Record<string, SessionLike>): void {
923
+ if (task.status !== 'completed' && task.status !== 'failed') return
924
+ const targetSessionId = resolveTaskTerminalChatSessionId(task, sessions)
925
+ if (!targetSessionId) return
926
+ const targetSession = sessions[targetSessionId]
927
+ if (!targetSession) return
928
+
929
+ const delivery = collectTaskResultDeliveryData(task, sessions)
930
+ const taskLink = `[${task.title}](#task:${task.id})`
931
+ const body = buildTaskTerminalMessage(`Task ${delivery.statusLabel}: **${taskLink}**`, task, delivery)
932
+ const now = Date.now()
933
+ if (!Array.isArray(targetSession.messages)) targetSession.messages = []
934
+ const lastMsg = targetSession.messages.at(-1)
935
+ if (lastMsg?.role === 'assistant' && lastMsg?.text === body && typeof lastMsg?.time === 'number' && now - lastMsg.time < 30_000) {
936
+ return
937
+ }
938
+
939
+ const message: Message = {
940
+ role: 'assistant',
941
+ text: body,
942
+ time: now,
943
+ kind: 'system',
944
+ }
945
+ if (delivery.firstImage) message.imageUrl = delivery.firstImage.url
946
+ targetSession.messages.push(message)
947
+ targetSession.lastActiveAt = now
948
+ saveSessions(sessions as Record<string, Session>)
949
+ notify(`messages:${targetSessionId}`)
950
+ }
951
+
952
+ function deliverTaskConnectorFollowups(task: BoardTask, sessions: Record<string, SessionLike>): void {
953
+ if (task.status !== 'completed' && task.status !== 'failed') return
954
+ const delivery = collectTaskResultDeliveryData(task, sessions)
955
+ void notifyConnectorTaskFollowups({
956
+ task,
957
+ statusLabel: delivery.statusLabel,
958
+ summaryText: delivery.resultBody || '',
959
+ imageUrl: delivery.firstImage?.url,
960
+ mediaPath: delivery.followupMediaPath,
961
+ mediaFileName: delivery.mediaFileName,
962
+ })
963
+ }
964
+
965
+ function handleTerminalTaskResultDeliveries(task: BoardTask): void {
966
+ const sessions = loadSessions() as Record<string, SessionLike>
967
+ pushUserFacingTaskResult(task, sessions)
968
+ deliverTaskConnectorFollowups(task, sessions)
969
+ }
970
+
971
+ /** Disable heartbeat on a task's session when the task finishes. */
972
+ export function disableSessionHeartbeat(sessionId: string | null | undefined) {
973
+ if (!sessionId) return
974
+ const sessions = loadSessions()
975
+ const session = sessions[sessionId]
976
+ if (!session || session.heartbeatEnabled === false) return
977
+ session.heartbeatEnabled = false
978
+ session.lastActiveAt = Date.now()
979
+ saveSessions(sessions)
980
+ log.info(TAG, `[queue] Disabled heartbeat on session ${sessionId} (task finished)`)
981
+ }
982
+
983
+ export function enqueueTask(taskId: string) {
984
+ const tasks = loadTasks()
985
+ const task = tasks[taskId] as BoardTask | undefined
986
+ if (!task) return
987
+
988
+ applyTaskPolicyDefaults(task)
989
+ task.status = 'queued'
990
+ task.queuedAt = Date.now()
991
+ task.retryScheduledAt = null
992
+ task.updatedAt = Date.now()
993
+ saveTasks(tasks)
994
+
995
+ const queue = loadQueue()
996
+ pushQueueUnique(queue, taskId)
997
+ saveQueue(queue)
998
+
999
+ logActivity({ entityType: 'task', entityId: taskId, action: 'queued', actor: 'system', summary: `Task queued: "${task.title}"` })
1000
+
1001
+ pushMainLoopEventToMainSessions({
1002
+ type: 'task_queued',
1003
+ text: `Task queued: "${task.title}" (${task.id})`,
1004
+ })
1005
+
1006
+ // If processNext is at capacity, mark a pending kick so it picks up work when a slot frees
1007
+ if (_queueState.activeCount >= _queueState.maxConcurrent) {
1008
+ _queueState.pendingKick = true
1009
+ }
1010
+ // Delay before kicking worker so UI shows the queued state
1011
+ setTimeout(() => processNext(), 2000)
1012
+ }
1013
+
1014
+ /**
1015
+ * Re-validate all completed tasks so the completed queue only contains
1016
+ * tasks with concrete completion evidence.
1017
+ */
1018
+ export function validateCompletedTasksQueue() {
1019
+ const tasks = loadTasks()
1020
+ const sessions = loadSessions()
1021
+ const settings = loadSettings()
1022
+ const now = Date.now()
1023
+ let checked = 0
1024
+ let demoted = 0
1025
+ let tasksDirty = false
1026
+ let sessionsDirty = false
1027
+
1028
+ for (const task of Object.values(tasks) as BoardTask[]) {
1029
+ if (task.status !== 'completed') continue
1030
+ checked++
1031
+
1032
+ const previousValidation = task.validation || null
1033
+ const previousReportPath = task.completionReportPath || null
1034
+ const { validation } = refreshTaskCompletionValidation(task, settings)
1035
+ if (task.completionReportPath !== previousReportPath) {
1036
+ tasksDirty = true
1037
+ }
1038
+ const validationChanged = didTaskValidationChange(previousValidation, validation)
1039
+
1040
+ if (validationChanged) {
1041
+ tasksDirty = true
1042
+ }
1043
+
1044
+ if (validation.ok) {
1045
+ if (!task.completedAt) {
1046
+ markValidatedTaskCompleted(task, { now, preserveCompletedAt: true })
1047
+ tasksDirty = true
1048
+ }
1049
+ continue
1050
+ }
1051
+
1052
+ markInvalidCompletedTaskFailed(task, validation, {
1053
+ now,
1054
+ comment: {
1055
+ author: 'System',
1056
+ text: `Task auto-failed completed-queue validation.\n\n${validation.reasons.map((r) => `- ${r}`).join('\n')}`,
1057
+ },
1058
+ })
1059
+ tasksDirty = true
1060
+ demoted++
1061
+
1062
+ if (task.sessionId) {
1063
+ const session = sessions[task.sessionId]
1064
+ if (session && session.heartbeatEnabled !== false) {
1065
+ session.heartbeatEnabled = false
1066
+ session.lastActiveAt = now
1067
+ sessionsDirty = true
1068
+ }
1069
+ }
1070
+ }
1071
+
1072
+ if (tasksDirty) { saveTasks(tasks); notify('tasks') }
1073
+ if (sessionsDirty) saveSessions(sessions)
1074
+ if (demoted > 0) {
1075
+ log.warn(TAG, `[queue] Demoted ${demoted} invalid completed task(s) to failed after validation audit`)
1076
+ }
1077
+ return { checked, demoted }
1078
+ }
1079
+
1080
+ function scheduleRetryOrDeadLetter(task: BoardTask, reason: string): 'retry' | 'dead_lettered' {
1081
+ if (isCancelledTask(task)) {
1082
+ task.retryScheduledAt = null
1083
+ task.deadLetteredAt = null
1084
+ task.updatedAt = Date.now()
1085
+ return 'dead_lettered'
1086
+ }
1087
+ applyTaskPolicyDefaults(task)
1088
+ const now = Date.now()
1089
+ task.attempts = (task.attempts || 0) + 1
1090
+
1091
+ if ((task.attempts || 0) < (task.maxAttempts || 1)) {
1092
+ const delayMs = jitteredBackoff((task.retryBackoffSec || 30) * 1000, Math.max(0, (task.attempts || 1) - 1), 6 * 3600_000)
1093
+ task.status = 'queued'
1094
+ task.retryScheduledAt = now + delayMs
1095
+ task.updatedAt = now
1096
+ task.error = `Retry scheduled after failure: ${reason}`.slice(0, 500)
1097
+ if (!task.comments) task.comments = []
1098
+ task.comments.push({
1099
+ id: genId(),
1100
+ author: 'System',
1101
+ text: `Attempt ${task.attempts}/${task.maxAttempts} failed. Retrying in ${Math.round(delayMs / 1000)}s.\n\nReason: ${reason}`,
1102
+ createdAt: now,
1103
+ })
1104
+ return 'retry'
1105
+ }
1106
+
1107
+ task.status = 'failed'
1108
+ task.deadLetteredAt = now
1109
+ task.retryScheduledAt = null
1110
+ task.updatedAt = now
1111
+ task.error = `Dead-lettered after ${task.attempts}/${task.maxAttempts} attempts: ${reason}`.slice(0, 500)
1112
+ if (!task.comments) task.comments = []
1113
+ task.comments.push({
1114
+ id: genId(),
1115
+ author: 'System',
1116
+ text: `Task moved to dead-letter after ${task.attempts}/${task.maxAttempts} attempts.\n\nReason: ${reason}`,
1117
+ createdAt: now,
1118
+ })
1119
+ notifyOrchestrators(`Task failed: "${task.title}" — ${(reason || 'unknown error').slice(0, 100)}`, `task-fail:${task.id}`)
1120
+ if (task.sessionId) {
1121
+ const failure = classifyRuntimeFailure({ source: 'task', message: reason })
1122
+ recordSupervisorIncident({
1123
+ runId: task.id,
1124
+ sessionId: task.sessionId,
1125
+ taskId: task.id,
1126
+ agentId: task.agentId || null,
1127
+ source: 'task',
1128
+ kind: 'runtime_failure',
1129
+ severity: failure.severity,
1130
+ summary: `Task dead-lettered: ${reason}`.slice(0, 320),
1131
+ details: reason,
1132
+ failureFamily: failure.family,
1133
+ remediation: failure.remediation,
1134
+ repairPrompt: failure.repairPrompt,
1135
+ autoAction: null,
1136
+ })
1137
+ }
1138
+
1139
+ // Guardian recovery is approval-backed. Dead-lettering prepares a restore
1140
+ // request instead of mutating the workspace automatically.
1141
+ const agents = loadAgents()
1142
+ const agent = task.agentId ? agents[task.agentId] : null
1143
+ if (agent?.autoRecovery) {
1144
+ const cwd = task.projectId
1145
+ ? path.join(WORKSPACE_DIR, 'projects', task.projectId)
1146
+ : WORKSPACE_DIR
1147
+ const recovery = prepareGuardianRecovery({
1148
+ cwd,
1149
+ reason,
1150
+ requester: `task:${task.id}`,
1151
+ })
1152
+ if (recovery.ok && recovery.approval) {
1153
+ task.comments.push({
1154
+ id: genId(),
1155
+ author: 'Guardian',
1156
+ text: `Recovery prepared for checkpoint ${recovery.checkpoint?.head.slice(0, 12) || 'unknown'}.\n\nApprove restore request ${recovery.approval.id} to roll the workspace back safely.`,
1157
+ createdAt: now + 1,
1158
+ })
1159
+ } else {
1160
+ task.comments.push({
1161
+ id: genId(),
1162
+ author: 'Guardian',
1163
+ text: `Recovery advisory: ${recovery.reason || 'Unable to prepare a restore request.'}`,
1164
+ createdAt: now + 1,
1165
+ })
1166
+ }
1167
+ }
1168
+
1169
+ return 'dead_lettered'
1170
+ }
1171
+
1172
+ export function dequeueNextRunnableTask(queue: string[], tasks: Record<string, BoardTask>): string | null {
1173
+ const now = Date.now()
1174
+
1175
+ // Remove stale entries first.
1176
+ for (let i = queue.length - 1; i >= 0; i--) {
1177
+ const id = queue[i]
1178
+ const task = tasks[id]
1179
+ if (!task || task.status !== 'queued') queue.splice(i, 1)
1180
+ }
1181
+
1182
+ const idx = queue.findIndex((id) => {
1183
+ const task = tasks[id]
1184
+ if (!task) return false
1185
+ const retryAt = typeof task.retryScheduledAt === 'number' ? task.retryScheduledAt : null
1186
+ if (retryAt && retryAt > now) return false
1187
+ const blockers = Array.isArray(task.blockedBy) ? task.blockedBy : []
1188
+ if (blockers.some((blockerId) => tasks[blockerId]?.status !== 'completed')) return false
1189
+ // Skip pool-mode tasks that haven't been claimed yet
1190
+ if (task.assignmentMode === 'pool' && !task.claimedByAgentId) return false
1191
+ return true
1192
+ })
1193
+ if (idx === -1) return null
1194
+ const [taskId] = queue.splice(idx, 1)
1195
+ return taskId || null
1196
+ }
1197
+
1198
+ export async function processNext() {
1199
+ const settings = loadSettings()
1200
+ _queueState.maxConcurrent = normalizeInt(
1201
+ (settings as Record<string, unknown>).taskQueueConcurrency, 3, 1, 10
1202
+ )
1203
+
1204
+ if (_queueState.activeCount >= _queueState.maxConcurrent) {
1205
+ _queueState.pendingKick = true
1206
+ return
1207
+ }
1208
+ _queueState.activeCount++
1209
+ const endQueuePerf = perf.start('queue', 'processNext')
1210
+
1211
+ try {
1212
+ // Recover orphaned tasks: status is 'queued' but missing from the queue array
1213
+ // Only run from the first worker to avoid redundant scans
1214
+ if (_queueState.activeCount === 1) {
1215
+ const allTasks = loadTasks()
1216
+ const currentQueue = loadQueue()
1217
+ const queueSet = new Set(currentQueue)
1218
+ let recovered = false
1219
+ for (const [id, t] of Object.entries(allTasks) as [string, BoardTask][]) {
1220
+ if (t.status === 'queued' && !queueSet.has(id)) {
1221
+ log.info(TAG, `[queue] Recovering orphaned queued task: "${t.title}" (${id})`)
1222
+ pushQueueUnique(currentQueue, id)
1223
+ recovered = true
1224
+ }
1225
+ }
1226
+ if (recovered) saveQueue(currentQueue)
1227
+ }
1228
+
1229
+ // Process ONE task per invocation (no while loop)
1230
+ {
1231
+ const tasks = loadTasks()
1232
+ const queue = loadQueue()
1233
+ if (queue.length === 0) return
1234
+
1235
+ const taskId = dequeueNextRunnableTask(queue, tasks as Record<string, BoardTask>)
1236
+ saveQueue(queue)
1237
+ if (!taskId) return
1238
+ const latestTasks = loadTasks() as Record<string, BoardTask>
1239
+ let task = latestTasks[taskId] as BoardTask | undefined
1240
+
1241
+ if (!task || task.status !== 'queued') {
1242
+ return
1243
+ }
1244
+
1245
+ // Dependency guard: skip tasks whose blockers are not all completed
1246
+ const blockers = Array.isArray(task.blockedBy) ? task.blockedBy as string[] : []
1247
+ if (blockers.length > 0) {
1248
+ const allBlockersDone = blockers.every((bid) => {
1249
+ const blocker = latestTasks[bid] as BoardTask | undefined
1250
+ return blocker?.status === 'completed'
1251
+ })
1252
+ if (!allBlockersDone) {
1253
+ // Put it back in the queue and skip
1254
+ pushQueueUnique(queue, taskId)
1255
+ saveQueue(queue)
1256
+ log.info(TAG, `[queue] Skipping task "${task.title}" (${taskId}) — blocked by incomplete dependencies`)
1257
+ return
1258
+ }
1259
+ }
1260
+
1261
+ const agents = loadAgents()
1262
+ let agent = agents[task.agentId]
1263
+ if (!agent) {
1264
+ task.status = 'failed'
1265
+ task.deadLetteredAt = Date.now()
1266
+ task.error = `Agent ${task.agentId} not found`
1267
+ task.updatedAt = Date.now()
1268
+ saveTasks(latestTasks)
1269
+ pushMainLoopEventToMainSessions({
1270
+ type: 'task_failed',
1271
+ text: `Task failed: "${task.title}" (${task.id}) — agent not found.`,
1272
+ })
1273
+ return
1274
+ }
1275
+
1276
+ // Capability matching — reroute if assigned agent doesn't have required capabilities
1277
+ const reqCaps = Array.isArray(task.requiredCapabilities) ? task.requiredCapabilities as string[] : []
1278
+ if (reqCaps.length > 0 && !matchesCapabilities(agent.capabilities, reqCaps)) {
1279
+ const candidates = filterAgentsByCapabilities(agents, reqCaps)
1280
+ .filter((a) => a.id !== agent!.id && !a.disabled)
1281
+ if (candidates.length > 0) {
1282
+ // Pick best match by capability score, then alphabetically for stability
1283
+ candidates.sort((a, b) => {
1284
+ const scoreA = capabilityMatchScore(a.capabilities, reqCaps)
1285
+ const scoreB = capabilityMatchScore(b.capabilities, reqCaps)
1286
+ if (scoreB !== scoreA) return scoreB - scoreA
1287
+ return a.name.localeCompare(b.name)
1288
+ })
1289
+ const rerouted = candidates[0]
1290
+ log.info(TAG, `[queue] Rerouting task "${task.title}" (${taskId}) from agent "${agent.name}" to "${rerouted.name}" — capability match`)
1291
+ task.agentId = rerouted.id
1292
+ agent = rerouted
1293
+ } else {
1294
+ task.status = 'failed'
1295
+ task.deadLetteredAt = Date.now()
1296
+ task.error = `No agent matches required capabilities: [${reqCaps.join(', ')}]`
1297
+ task.updatedAt = Date.now()
1298
+ saveTasks(latestTasks)
1299
+ pushMainLoopEventToMainSessions({
1300
+ type: 'task_failed',
1301
+ text: `Task failed: "${task.title}" (${task.id}) — no agent matches required capabilities [${reqCaps.join(', ')}].`,
1302
+ })
1303
+ return
1304
+ }
1305
+ }
1306
+
1307
+ if (isAgentDisabled(agent)) {
1308
+ const now = Date.now()
1309
+ task.deferredReason = buildAgentDisabledMessage(agent, 'process queued tasks')
1310
+ task.status = 'deferred'
1311
+ task.updatedAt = now
1312
+ task.retryScheduledAt = null
1313
+ saveTasks(latestTasks)
1314
+ notify('tasks')
1315
+ pushMainLoopEventToMainSessions({
1316
+ type: 'task_deferred',
1317
+ text: `Task deferred: "${task.title}" (${task.id}) — agent ${task.agentId} is disabled.`,
1318
+ })
1319
+ return
1320
+ }
1321
+
1322
+ // Budget enforcement gate
1323
+ const typedAgent = agent as Agent
1324
+ if (typedAgent.monthlyBudget || typedAgent.dailyBudget || typedAgent.hourlyBudget) {
1325
+ try {
1326
+ const budgetCheck = checkAgentBudgetLimits(typedAgent)
1327
+ if (!budgetCheck.ok) {
1328
+ const now = Date.now()
1329
+ const exceeded = budgetCheck.exceeded[0]
1330
+ task.status = 'deferred'
1331
+ task.deferredReason = exceeded?.message || 'Agent budget exceeded'
1332
+ task.retryScheduledAt = null
1333
+ task.updatedAt = now
1334
+ saveTasks(latestTasks)
1335
+ notify('tasks')
1336
+
1337
+ recordSupervisorIncident({
1338
+ runId: task.id,
1339
+ sessionId: task.sessionId || '',
1340
+ taskId: task.id,
1341
+ agentId: typedAgent.id,
1342
+ source: 'task',
1343
+ kind: 'budget_pressure',
1344
+ severity: 'high',
1345
+ summary: exceeded?.message || `Agent "${typedAgent.name}" budget exceeded, task deferred.`,
1346
+ autoAction: 'budget_trim',
1347
+ })
1348
+ return
1349
+ }
1350
+ } catch {}
1351
+ }
1352
+
1353
+ const beforeStartTasks = loadTasks() as Record<string, BoardTask>
1354
+ task = beforeStartTasks[taskId] as BoardTask | undefined
1355
+ if (!task || task.status !== 'queued') {
1356
+ return
1357
+ }
1358
+
1359
+ // Mark as running
1360
+ applyTaskPolicyDefaults(task)
1361
+ task.status = 'running'
1362
+ task.startedAt = Date.now()
1363
+ task.lastActivityAt = Date.now()
1364
+ task.retryScheduledAt = null
1365
+ task.deadLetteredAt = null
1366
+ // Clear transient failure fields so validation/error state reflects only this attempt.
1367
+ task.error = null
1368
+ task.validation = null
1369
+ task.updatedAt = Date.now()
1370
+ logActivity({ entityType: 'task', entityId: taskId, action: 'running', actor: 'system', actorId: task.agentId, summary: `Task started: "${task.title}"` })
1371
+
1372
+ const sessionsForCwd = loadSessions() as Record<string, SessionLike>
1373
+ const taskCwd = resolveTaskExecutionCwd(task as ScheduleTaskMeta, sessionsForCwd)
1374
+ task.cwd = taskCwd
1375
+ let sessionId = ''
1376
+ const scheduleTask = task as ScheduleTaskMeta
1377
+ const isScheduleTask = scheduleTask.sourceType === 'schedule'
1378
+ const sourceScheduleId = typeof scheduleTask.sourceScheduleId === 'string'
1379
+ ? scheduleTask.sourceScheduleId
1380
+ : ''
1381
+ const reusableTaskSessionId = resolveReusableTaskSessionId(task, beforeStartTasks, sessionsForCwd)
1382
+ const resumeContext = resolveTaskResumeContext(task, beforeStartTasks, sessionsForCwd as Record<string, SessionLike | Session>)
1383
+
1384
+ // Resolve the agent's persistent thread session to use as parentSessionId
1385
+ const agentThreadSessionId = agent.threadSessionId || null
1386
+ const taskRoutePreferences = deriveTaskRoutePreferences(task)
1387
+
1388
+ if (isScheduleTask && sourceScheduleId) {
1389
+ const schedules = loadSchedules()
1390
+ const linkedSchedule = schedules[sourceScheduleId]
1391
+ const linkedScheduleRecord = linkedSchedule as unknown as Record<string, unknown> | undefined
1392
+ const existingSessionId = typeof linkedScheduleRecord?.lastSessionId === 'string'
1393
+ ? linkedScheduleRecord.lastSessionId
1394
+ : ''
1395
+ if (existingSessionId) {
1396
+ const sessions = loadSessions()
1397
+ if (sessions[existingSessionId]) {
1398
+ sessionId = existingSessionId
1399
+ }
1400
+ }
1401
+ if (!sessionId) {
1402
+ sessionId = createAgentTaskSession(
1403
+ agent,
1404
+ task.title,
1405
+ agentThreadSessionId || undefined,
1406
+ taskCwd,
1407
+ taskRoutePreferences,
1408
+ )
1409
+ }
1410
+ if (linkedScheduleRecord && linkedScheduleRecord.lastSessionId !== sessionId) {
1411
+ linkedScheduleRecord.lastSessionId = sessionId
1412
+ linkedScheduleRecord.updatedAt = Date.now()
1413
+ const updatedLinkedSchedule = linkedScheduleRecord as unknown as typeof linkedSchedule
1414
+ schedules[sourceScheduleId] = updatedLinkedSchedule
1415
+ saveSchedules(schedules)
1416
+ }
1417
+ } else {
1418
+ sessionId = reusableTaskSessionId || createAgentTaskSession(
1419
+ agent,
1420
+ task.title,
1421
+ agentThreadSessionId || undefined,
1422
+ taskCwd,
1423
+ taskRoutePreferences,
1424
+ )
1425
+ }
1426
+
1427
+ const executionSessions = loadSessions() as Record<string, Session>
1428
+ const executionSession = executionSessions[sessionId]
1429
+ const seededResumeState = executionSession
1430
+ ? applyTaskResumeStateToSession(executionSession, resumeContext?.resume)
1431
+ : false
1432
+ if (seededResumeState) saveSessions(executionSessions)
1433
+
1434
+ task.sessionId = sessionId
1435
+ const reusedExistingSession = !isScheduleTask && Boolean(reusableTaskSessionId) && reusableTaskSessionId === sessionId
1436
+ const continuationBits: string[] = []
1437
+ if (reusedExistingSession) {
1438
+ continuationBits.push('reusing prior session')
1439
+ }
1440
+ if (resumeContext?.source === 'delegated_from_task' || resumeContext?.source === 'blocked_by') {
1441
+ continuationBits.push(`seeded from task ${resumeContext.sourceTaskId}`)
1442
+ } else if (seededResumeState) {
1443
+ continuationBits.push('restored CLI resume handles')
1444
+ }
1445
+ task.checkpoint = {
1446
+ lastSessionId: sessionId,
1447
+ note: `Attempt ${(task.attempts || 0) + 1}/${task.maxAttempts || '?'} started${continuationBits.length ? ` (${continuationBits.join('; ')})` : ''}`,
1448
+ updatedAt: Date.now(),
1449
+ }
1450
+ saveTasks(beforeStartTasks)
1451
+ noteMissionTaskStarted(task, task.id)
1452
+ pushMainLoopEventToMainSessions({
1453
+ type: 'task_running',
1454
+ text: `Task running: "${task.title}" (${task.id}) with ${agent.name}`,
1455
+ })
1456
+
1457
+ // Save initial assistant message so user sees context when opening the session
1458
+ const sessions = loadSessions()
1459
+ if (sessions[sessionId]) {
1460
+ const isDelegation = (task as unknown as Record<string, unknown>).sourceType === 'delegation'
1461
+ let initialText: string
1462
+ if (isDelegation) {
1463
+ const delegatorId = (task as unknown as Record<string, unknown>).delegatedByAgentId as string | undefined
1464
+ const delegator = delegatorId ? agents[delegatorId] : null
1465
+ const prefix = `[delegation-source:${delegatorId || ''}:${delegator?.name || 'Agent'}:${delegator?.avatarSeed || ''}]`
1466
+ initialText = `${prefix}\nDelegated by **${delegator?.name || 'another agent'}** | [${task.title}](#task:${task.id})\n\n${task.description || ''}\n\nWorking directory: \`${taskCwd}\`${buildTaskContinuationNote(Boolean(reusedExistingSession), resumeContext)}\n\nI'll begin working on this now.`
1467
+ } else {
1468
+ initialText = `Starting task: **${task.title}**\n\n${task.description || ''}\n\nWorking directory: \`${taskCwd}\`${buildTaskContinuationNote(Boolean(reusedExistingSession), resumeContext)}\n\nI'll begin working on this now.`
1469
+ }
1470
+ // Inject upstream task results context
1471
+ if (Array.isArray(task.upstreamResults) && task.upstreamResults.length > 0) {
1472
+ const upstreamBlock = task.upstreamResults
1473
+ .map((ur) => `### ${ur.taskTitle}\n${ur.resultPreview || '(no result)'}`)
1474
+ .join('\n\n')
1475
+ initialText += `\n\n## Context from upstream tasks\n\n${upstreamBlock}`
1476
+ }
1477
+ sessions[sessionId].messages.push({
1478
+ role: 'assistant',
1479
+ text: initialText,
1480
+ time: Date.now(),
1481
+ ...(isDelegation ? { kind: 'system' as const } : {}),
1482
+ })
1483
+ saveSessions(sessions)
1484
+ }
1485
+
1486
+ log.info(TAG, `[queue] Running task "${task.title}" (${taskId}) with ${agent.name}`)
1487
+
1488
+ try {
1489
+ const taskRunId = `${taskId}:attempt-${(task.attempts || 0) + 1}`
1490
+ const endTaskRunPerf = perf.start('queue', 'executeTaskRun', { taskId, agentName: agent.name })
1491
+ const taskRun = await executeTaskRun(task, agent, sessionId)
1492
+ endTaskRunPerf()
1493
+ // Update lastActivityAt after execution completes (idle timeout tracking)
1494
+ {
1495
+ const latestTasks = loadTasks() as Record<string, BoardTask>
1496
+ const updatedTask = latestTasks[taskId]
1497
+ if (updatedTask) {
1498
+ updatedTask.lastActivityAt = Date.now()
1499
+ saveTasks(latestTasks)
1500
+ }
1501
+ }
1502
+ const result = taskRun.error
1503
+ ? (taskRun.text || `Error: ${taskRun.error}`)
1504
+ : taskRun.text
1505
+ const t2 = loadTasks()
1506
+ const settings = loadSettings()
1507
+ if (isCancelledTask(t2[taskId])) {
1508
+ disableSessionHeartbeat(t2[taskId].sessionId)
1509
+ notify('tasks')
1510
+ notify('runs')
1511
+ queueTaskAutonomyObservation({
1512
+ runId: taskRunId,
1513
+ sessionId,
1514
+ taskId,
1515
+ agentId: agent.id,
1516
+ status: 'cancelled',
1517
+ error: t2[taskId].error || 'Task cancelled',
1518
+ toolEvents: taskRun.toolEvents,
1519
+ sourceMessage: task.description || task.title,
1520
+ })
1521
+ log.warn(TAG, `[queue] Task "${task.title}" cancelled during execution`)
1522
+ return
1523
+ }
1524
+ if (t2[taskId]) {
1525
+ applyTaskPolicyDefaults(t2[taskId])
1526
+ // Structured extraction: Zod-validated result with typed artifacts
1527
+ const runSessions = loadSessions()
1528
+ const taskResult = extractTaskResult(
1529
+ runSessions[sessionId],
1530
+ result || null,
1531
+ { sinceTime: typeof t2[taskId].startedAt === 'number' ? t2[taskId].startedAt : null },
1532
+ )
1533
+ const enrichedResult = formatResultBody(taskResult)
1534
+ t2[taskId].result = enrichedResult.slice(0, 4000) || null
1535
+ t2[taskId].artifacts = taskResult.artifacts.slice(0, 24)
1536
+ t2[taskId].outputFiles = extractLikelyOutputFiles(enrichedResult).slice(0, 24)
1537
+ t2[taskId].updatedAt = Date.now()
1538
+ const { validation } = refreshTaskCompletionValidation(t2[taskId], settings)
1539
+
1540
+ const now = Date.now()
1541
+ // Add a completion/failure comment from the executing agent.
1542
+ if (!t2[taskId].comments) t2[taskId].comments = []
1543
+
1544
+ if (validation.ok) {
1545
+ markValidatedTaskCompleted(t2[taskId], { now })
1546
+ t2[taskId].retryScheduledAt = null
1547
+ t2[taskId].checkpoint = {
1548
+ ...(t2[taskId].checkpoint || {}),
1549
+ lastRunId: sessionId,
1550
+ lastSessionId: sessionId,
1551
+ note: `Completed on attempt ${t2[taskId].attempts || 0}/${t2[taskId].maxAttempts || '?'}`,
1552
+ updatedAt: now,
1553
+ }
1554
+ t2[taskId].comments!.push({
1555
+ id: genId(),
1556
+ author: agent.name,
1557
+ agentId: agent.id,
1558
+ text: `Task completed.\n\n${result?.slice(0, 1000) || 'No summary provided.'}`,
1559
+ createdAt: now,
1560
+ })
1561
+ } else {
1562
+ const failureReason = formatValidationFailure(validation.reasons).slice(0, 500)
1563
+ const retryState = scheduleRetryOrDeadLetter(t2[taskId], failureReason)
1564
+ t2[taskId].completedAt = retryState === 'dead_lettered' ? null : t2[taskId].completedAt
1565
+ t2[taskId].comments!.push({
1566
+ id: genId(),
1567
+ author: agent.name,
1568
+ agentId: agent.id,
1569
+ text: `Task failed validation and was not marked completed.\n\n${validation.reasons.map((r) => `- ${r}`).join('\n')}`,
1570
+ createdAt: now,
1571
+ })
1572
+ if (retryState === 'retry') {
1573
+ const qRetry = loadQueue()
1574
+ pushQueueUnique(qRetry, taskId)
1575
+ saveQueue(qRetry)
1576
+ pushMainLoopEventToMainSessions({
1577
+ type: 'task_retry_scheduled',
1578
+ text: `Task retry scheduled: "${task.title}" (${taskId}) attempt ${t2[taskId].attempts}/${t2[taskId].maxAttempts} in ${t2[taskId].retryBackoffSec}s.`,
1579
+ })
1580
+ }
1581
+ }
1582
+
1583
+ // Copy ALL CLI resume IDs from the execution session to the task record
1584
+ try {
1585
+ const execSessions = loadSessions()
1586
+ const execSession = execSessions[sessionId] as unknown as Record<string, unknown> | undefined
1587
+ if (execSession) {
1588
+ const delegateIds = execSession.delegateResumeIds as
1589
+ | { claudeCode?: string | null; codex?: string | null; opencode?: string | null; gemini?: string | null }
1590
+ | undefined
1591
+ // Store each CLI resume ID separately
1592
+ const claudeId = (execSession.claudeSessionId as string) || delegateIds?.claudeCode || null
1593
+ const codexId = (execSession.codexThreadId as string) || delegateIds?.codex || null
1594
+ const opencodeId = (execSession.opencodeSessionId as string) || delegateIds?.opencode || null
1595
+ const geminiId = delegateIds?.gemini || null
1596
+ if (claudeId) t2[taskId].claudeResumeId = claudeId
1597
+ if (codexId) t2[taskId].codexResumeId = codexId
1598
+ if (opencodeId) t2[taskId].opencodeResumeId = opencodeId
1599
+ if (geminiId) t2[taskId].geminiResumeId = geminiId
1600
+ // Keep backward-compat single field (first available)
1601
+ const primaryId = claudeId || codexId || opencodeId || geminiId
1602
+ if (primaryId) {
1603
+ t2[taskId].cliResumeId = primaryId
1604
+ if (claudeId) t2[taskId].cliProvider = 'claude-cli'
1605
+ else if (codexId) t2[taskId].cliProvider = 'codex-cli'
1606
+ else if (opencodeId) t2[taskId].cliProvider = 'opencode-cli'
1607
+ else if (geminiId) t2[taskId].cliProvider = 'gemini-cli'
1608
+ }
1609
+ log.info(TAG, `[queue] CLI resume IDs for task ${taskId}: claude=${claudeId}, codex=${codexId}, opencode=${opencodeId}, gemini=${geminiId}`)
1610
+ }
1611
+ } catch (e) {
1612
+ log.warn(TAG, `[queue] Failed to extract CLI resume IDs for task ${taskId}:`, e)
1613
+ }
1614
+
1615
+ saveTasks(t2)
1616
+ notify('tasks')
1617
+ notify('runs')
1618
+ disableSessionHeartbeat(t2[taskId].sessionId)
1619
+ }
1620
+ const doneTask = t2[taskId]
1621
+ if (doneTask?.status === 'completed') {
1622
+ noteMissionTaskFinished(doneTask, 'completed', taskRunId)
1623
+ } else if (doneTask?.status === 'failed') {
1624
+ noteMissionTaskFinished(doneTask, 'failed', taskRunId)
1625
+ } else if (doneTask?.status === 'cancelled') {
1626
+ noteMissionTaskFinished(doneTask, 'cancelled', taskRunId)
1627
+ }
1628
+ queueTaskAutonomyObservation({
1629
+ runId: taskRunId,
1630
+ sessionId,
1631
+ taskId,
1632
+ agentId: agent.id,
1633
+ status: doneTask?.status === 'completed'
1634
+ ? 'completed'
1635
+ : doneTask?.status === 'cancelled'
1636
+ ? 'cancelled'
1637
+ : 'failed',
1638
+ resultText: doneTask?.result || result || null,
1639
+ error: doneTask?.status === 'completed' ? null : (doneTask?.error || taskRun.error || null),
1640
+ toolEvents: taskRun.toolEvents,
1641
+ sourceMessage: task.description || task.title,
1642
+ })
1643
+ if (doneTask?.status === 'completed') {
1644
+ pushMainLoopEventToMainSessions({
1645
+ type: 'task_completed',
1646
+ text: `Task completed: "${task.title}" (${taskId})`,
1647
+ })
1648
+ notifyOrchestrators(`Task completed: "${task.title}"`, `task-complete:${taskId}`)
1649
+ handleTerminalTaskResultDeliveries(doneTask)
1650
+ cleanupTerminalOneOffSchedule(doneTask)
1651
+ // Clean up LangGraph checkpoints for completed tasks
1652
+ getCheckpointSaver().deleteThread(taskId).catch((e) =>
1653
+ log.warn(TAG, `[queue] Failed to clean up checkpoints for task ${taskId}:`, e)
1654
+ )
1655
+ // Cascade unblock: auto-queue tasks whose blockers are all done
1656
+ const latestTasks = loadTasks()
1657
+ const unblockedIds = cascadeUnblock(latestTasks, taskId)
1658
+ if (unblockedIds.length > 0) {
1659
+ saveTasks(latestTasks)
1660
+ for (const uid of unblockedIds) {
1661
+ enqueueTask(uid)
1662
+ log.info(TAG, `[queue] Auto-unblocked task "${latestTasks[uid]?.title}" (${uid})`)
1663
+ }
1664
+ notify('tasks')
1665
+ }
1666
+ // Wake waiting protocol runs when a linked task completes
1667
+ if (latestTasks[taskId]?.protocolRunId) {
1668
+ try {
1669
+ const { wakeProtocolRunFromTaskCompletion } = await import('@/lib/server/protocols/protocol-service')
1670
+ wakeProtocolRunFromTaskCompletion(taskId)
1671
+ } catch (e) {
1672
+ log.warn(TAG, `[queue] Failed to wake protocol run for task ${taskId}:`, e)
1673
+ }
1674
+ }
1675
+ log.info(TAG, `[queue] Task "${task.title}" completed`)
1676
+ } else if (doneTask?.status === 'cancelled') {
1677
+ log.warn(TAG, `[queue] Task "${task.title}" cancelled during execution`)
1678
+ } else {
1679
+ if (doneTask?.status === 'queued') {
1680
+ log.warn(TAG, `[queue] Task "${task.title}" scheduled for retry`)
1681
+ } else {
1682
+ pushMainLoopEventToMainSessions({
1683
+ type: 'task_failed',
1684
+ text: `Task failed validation: "${task.title}" (${taskId})`,
1685
+ })
1686
+ notifyOrchestrators(`Task failed: "${task.title}" — validation failure`, `task-fail:${taskId}`)
1687
+ if (doneTask?.status === 'failed') {
1688
+ handleTerminalTaskResultDeliveries(doneTask)
1689
+ cleanupTerminalOneOffSchedule(doneTask)
1690
+ }
1691
+ log.warn(TAG, `[queue] Task "${task.title}" failed completion validation`)
1692
+ }
1693
+ }
1694
+ } catch (err: unknown) {
1695
+ const errMsg = err instanceof Error ? err.message : String(err || 'Unknown error')
1696
+ log.error(TAG, `[queue] Task "${task.title}" failed:`, errMsg)
1697
+ const taskRunId = `${taskId}:attempt-${(task.attempts || 0) + 1}`
1698
+ const t2 = loadTasks()
1699
+ if (isCancelledTask(t2[taskId])) {
1700
+ disableSessionHeartbeat(t2[taskId].sessionId)
1701
+ notify('tasks')
1702
+ notify('runs')
1703
+ queueTaskAutonomyObservation({
1704
+ runId: taskRunId,
1705
+ sessionId,
1706
+ taskId,
1707
+ agentId: agent.id,
1708
+ status: 'cancelled',
1709
+ error: t2[taskId].error || errMsg,
1710
+ sourceMessage: task.description || task.title,
1711
+ })
1712
+ log.warn(TAG, `[queue] Task "${task.title}" aborted because it was cancelled`)
1713
+ return
1714
+ }
1715
+ if (t2[taskId]) {
1716
+ applyTaskPolicyDefaults(t2[taskId])
1717
+
1718
+ // Auto-repair: attempt a repair turn before retrying if a repairPrompt is available
1719
+ const failureClassification = classifyRuntimeFailure({ source: 'task', message: errMsg })
1720
+ if (failureClassification.repairPrompt && t2[taskId].sessionId) {
1721
+ try {
1722
+ const repairRunId = `repair:${taskId}:${Date.now()}`
1723
+ t2[taskId].repairRunId = repairRunId
1724
+ t2[taskId].lastRepairAttemptAt = Date.now()
1725
+ saveTasks(t2)
1726
+ await executeSessionChatTurn({
1727
+ sessionId: t2[taskId].sessionId!,
1728
+ message: `[AUTO-REPAIR] ${failureClassification.repairPrompt}\n\nOriginal error: ${errMsg.slice(0, 300)}`,
1729
+ internal: true,
1730
+ source: 'task-repair',
1731
+ runId: repairRunId,
1732
+ })
1733
+ log.info(TAG, `[queue] Repair turn completed for task "${task.title}" (${taskId})`)
1734
+ } catch (repairErr: unknown) {
1735
+ log.warn(TAG, `[queue] Repair turn failed for task "${task.title}":`, repairErr instanceof Error ? repairErr.message : String(repairErr))
1736
+ // If repair fails, attempt guardian recovery
1737
+ const taskCwd = t2[taskId].cwd || WORKSPACE_DIR
1738
+ prepareGuardianRecovery({
1739
+ cwd: taskCwd,
1740
+ reason: `Auto-repair failed for task "${task.title}": ${errMsg.slice(0, 200)}`,
1741
+ requester: agent.id,
1742
+ })
1743
+ }
1744
+ }
1745
+
1746
+ // Reload tasks after the async repair turn to avoid overwriting concurrent mutations
1747
+ const t3 = loadTasks()
1748
+ // Carry forward repair fields that were saved before the async turn
1749
+ if (t2[taskId].repairRunId && t3[taskId]) {
1750
+ t3[taskId].repairRunId = t2[taskId].repairRunId
1751
+ t3[taskId].lastRepairAttemptAt = t2[taskId].lastRepairAttemptAt
1752
+ }
1753
+ const retryState = scheduleRetryOrDeadLetter(t3[taskId], errMsg.slice(0, 500) || 'Unknown error')
1754
+ if (!t3[taskId].comments) t3[taskId].comments = []
1755
+ // Only add a failure comment if the last comment isn't already an error comment
1756
+ const lastComment = t3[taskId].comments!.at(-1)
1757
+ const isRepeatError = lastComment?.agentId === agent.id && lastComment?.text.startsWith('Task failed')
1758
+ if (!isRepeatError) {
1759
+ t3[taskId].comments!.push({
1760
+ id: genId(),
1761
+ author: agent.name,
1762
+ agentId: agent.id,
1763
+ text: 'Task failed — see error details above.',
1764
+ createdAt: Date.now(),
1765
+ })
1766
+ }
1767
+ saveTasks(t3)
1768
+ if (t3[taskId].status === 'failed') {
1769
+ noteMissionTaskFinished(t3[taskId], 'failed', taskRunId)
1770
+ } else if (t3[taskId].status === 'cancelled') {
1771
+ noteMissionTaskFinished(t3[taskId], 'cancelled', taskRunId)
1772
+ }
1773
+ notify('tasks')
1774
+ notify('runs')
1775
+ disableSessionHeartbeat(t3[taskId].sessionId)
1776
+ if (retryState === 'retry') {
1777
+ const qRetry = loadQueue()
1778
+ pushQueueUnique(qRetry, taskId)
1779
+ saveQueue(qRetry)
1780
+ pushMainLoopEventToMainSessions({
1781
+ type: 'task_retry_scheduled',
1782
+ text: `Task retry scheduled: "${task.title}" (${taskId}) attempt ${t3[taskId].attempts}/${t3[taskId].maxAttempts}.`,
1783
+ })
1784
+ }
1785
+ }
1786
+ queueTaskAutonomyObservation({
1787
+ runId: taskRunId,
1788
+ sessionId,
1789
+ taskId,
1790
+ agentId: agent.id,
1791
+ status: 'failed',
1792
+ error: errMsg,
1793
+ sourceMessage: task.description || task.title,
1794
+ })
1795
+ const latest = loadTasks()[taskId] as BoardTask | undefined
1796
+ if (latest?.status === 'queued') {
1797
+ log.warn(TAG, `[queue] Task "${task.title}" queued for retry after error`)
1798
+ } else if (latest?.status === 'cancelled') {
1799
+ log.warn(TAG, `[queue] Task "${task.title}" stayed cancelled after abort`)
1800
+ } else {
1801
+ pushMainLoopEventToMainSessions({
1802
+ type: 'task_failed',
1803
+ text: `Task failed: "${task.title}" (${taskId}) — ${errMsg.slice(0, 200)}`,
1804
+ })
1805
+ if (latest?.status === 'failed') {
1806
+ handleTerminalTaskResultDeliveries(latest)
1807
+ cleanupTerminalOneOffSchedule(latest)
1808
+ }
1809
+ }
1810
+ }
1811
+ }
1812
+ } finally {
1813
+ _queueState.activeCount--
1814
+ endQueuePerf()
1815
+ const pendingKick = _queueState.pendingKick
1816
+ _queueState.pendingKick = false
1817
+ if (pendingKick) {
1818
+ setTimeout(() => processNext(), 0)
1819
+ return
1820
+ }
1821
+
1822
+ // Only re-kick when work is actually runnable. This avoids hot loops when the
1823
+ // queue only contains blocked, deferred, or retry-gated tasks.
1824
+ const remainingQueue = loadQueue()
1825
+ if (!remainingQueue.length) return
1826
+ const tasks = loadTasks() as Record<string, BoardTask>
1827
+ const probeQueue = [...remainingQueue]
1828
+ const nextRunnableTaskId = dequeueNextRunnableTask(probeQueue, tasks)
1829
+ if (nextRunnableTaskId) {
1830
+ setTimeout(() => processNext(), 0)
1831
+ }
1832
+ }
1833
+ }
1834
+
1835
+ /** On boot, disable heartbeat on sessions whose tasks are already terminal. */
1836
+ export function cleanupFinishedTaskSessions() {
1837
+ const tasks = loadTasks()
1838
+ const sessions = loadSessions()
1839
+ let cleaned = 0
1840
+ for (const task of Object.values(tasks) as BoardTask[]) {
1841
+ if ((task.status === 'completed' || task.status === 'failed' || task.status === 'cancelled') && task.sessionId) {
1842
+ const session = sessions[task.sessionId]
1843
+ if (session && session.heartbeatEnabled !== false) {
1844
+ session.heartbeatEnabled = false
1845
+ session.lastActiveAt = Date.now()
1846
+ cleaned++
1847
+ }
1848
+ }
1849
+ }
1850
+ if (cleaned > 0) {
1851
+ saveSessions(sessions)
1852
+ log.info(TAG, `[queue] Disabled heartbeat on ${cleaned} session(s) with finished tasks`)
1853
+ }
1854
+ }
1855
+
1856
+ /** Recover running tasks that appear stalled and requeue/dead-letter them per retry policy. */
1857
+ export function recoverStalledRunningTasks(): { recovered: number; deadLettered: number } {
1858
+ const finished = reconcileFinishedRunningTasks()
1859
+ const settings = loadSettings()
1860
+ const stallTimeoutMin = normalizeInt(settings.taskStallTimeoutMin, 45, 5, 24 * 60)
1861
+ const staleMs = stallTimeoutMin * 60_000
1862
+ const idleTimeoutMin = normalizeInt((settings as Record<string, unknown>).taskIdleTimeoutMin, 15, 2, 120)
1863
+ const idleMs = idleTimeoutMin * 60_000
1864
+ const now = Date.now()
1865
+ const tasks = loadTasks()
1866
+ const queue = loadQueue()
1867
+ let recovered = finished.reconciled
1868
+ let deadLettered = finished.deadLettered
1869
+ let changed = false
1870
+
1871
+ for (const task of Object.values(tasks) as BoardTask[]) {
1872
+ if (task.status !== 'running') continue
1873
+ if (!task.startedAt) {
1874
+ const recoveredAt = Date.now()
1875
+ task.status = 'queued'
1876
+ task.queuedAt = task.queuedAt || recoveredAt
1877
+ task.retryScheduledAt = Date.now() + 30_000
1878
+ task.updatedAt = recoveredAt
1879
+ task.error = 'Recovered inconsistent running state (missing startedAt); requeued.'
1880
+ if (!task.comments) task.comments = []
1881
+ task.comments.push({
1882
+ id: genId(),
1883
+ author: 'System',
1884
+ text: 'Recovered inconsistent running state (missing startedAt). Task requeued.',
1885
+ createdAt: recoveredAt,
1886
+ })
1887
+ pushQueueUnique(queue, task.id)
1888
+ recovered++
1889
+ changed = true
1890
+ pushMainLoopEventToMainSessions({
1891
+ type: 'task_stall_recovered',
1892
+ text: `Recovered inconsistent running task "${task.title}" (${task.id}) and requeued it.`,
1893
+ })
1894
+ continue
1895
+ }
1896
+ // Existing stall check (overall timeout based on updatedAt/startedAt)
1897
+ const since = Math.max(task.updatedAt || 0, task.startedAt || 0)
1898
+ const isStalled = since > 0 && (now - since) >= staleMs
1899
+
1900
+ // Idle check (no LLM output for idleTimeoutMin)
1901
+ const lastActivity = task.lastActivityAt || task.startedAt || 0
1902
+ const idleDuration = lastActivity > 0 ? now - lastActivity : 0
1903
+ const isIdle = lastActivity > 0 && idleDuration >= idleMs
1904
+
1905
+ if (!isStalled && !isIdle) continue
1906
+
1907
+ const reason = isIdle
1908
+ ? `Idle timeout: no output for ${Math.round(idleDuration / 60_000)}m`
1909
+ : `Detected stalled run after ${stallTimeoutMin}m without progress`
1910
+ const state = scheduleRetryOrDeadLetter(task, reason)
1911
+ disableSessionHeartbeat(task.sessionId)
1912
+ changed = true
1913
+ if (state === 'retry') {
1914
+ pushQueueUnique(queue, task.id)
1915
+ recovered++
1916
+ pushMainLoopEventToMainSessions({
1917
+ type: 'task_stall_recovered',
1918
+ text: `Recovered stalled task "${task.title}" (${task.id}) and requeued attempt ${task.attempts}/${task.maxAttempts}.`,
1919
+ })
1920
+ } else {
1921
+ deadLettered++
1922
+ pushMainLoopEventToMainSessions({
1923
+ type: 'task_dead_lettered',
1924
+ text: `Task dead-lettered after stalling: "${task.title}" (${task.id}).`,
1925
+ })
1926
+ notifyOrchestrators(`Task failed: "${task.title}" — stalled and dead-lettered`, `task-fail:${task.id}`)
1927
+ }
1928
+ }
1929
+
1930
+ if (changed) {
1931
+ saveTasks(tasks)
1932
+ saveQueue(queue)
1933
+ if (recovered > 0) {
1934
+ setTimeout(() => processNext(), 250)
1935
+ }
1936
+ }
1937
+
1938
+ return { recovered, deadLettered }
1939
+ }
1940
+
1941
+ let _resumeQueueCalled = false
1942
+
1943
+ export function claimPoolTask(taskId: string, agentId: string): { success: boolean; error?: string } {
1944
+ // Atomic claim inside a SQLite transaction to prevent concurrent double-claims
1945
+ const result = withTransaction(() => {
1946
+ const tasks = loadTasks() as Record<string, BoardTask>
1947
+ const task = tasks[taskId]
1948
+ if (!task) return { success: false as const, error: 'Task not found' }
1949
+ if (task.assignmentMode !== 'pool') return { success: false as const, error: 'Task is not in pool mode' }
1950
+ if (task.claimedByAgentId) return { success: false as const, error: `Task already claimed by ${task.claimedByAgentId}` }
1951
+ if (task.status !== 'queued' && task.status !== 'backlog') return { success: false as const, error: `Task status is ${task.status}, not claimable` }
1952
+ const candidates = Array.isArray(task.poolCandidateAgentIds) ? task.poolCandidateAgentIds : []
1953
+ if (candidates.length > 0 && !candidates.includes(agentId)) {
1954
+ return { success: false as const, error: 'Agent is not in the candidate pool for this task' }
1955
+ }
1956
+ // Capability check — reject claim if agent doesn't have required capabilities
1957
+ const taskReqCaps = Array.isArray(task.requiredCapabilities) ? task.requiredCapabilities as string[] : []
1958
+ if (taskReqCaps.length > 0) {
1959
+ const allAgents = loadAgents()
1960
+ const claimingAgent = allAgents[agentId]
1961
+ if (!claimingAgent || !matchesCapabilities(claimingAgent.capabilities, taskReqCaps)) {
1962
+ return { success: false as const, error: `Agent does not match required capabilities: [${taskReqCaps.join(', ')}]` }
1963
+ }
1964
+ }
1965
+ task.claimedByAgentId = agentId
1966
+ task.claimedAt = Date.now()
1967
+ task.agentId = agentId
1968
+ task.updatedAt = Date.now()
1969
+ saveTasks(tasks)
1970
+ return { success: true as const, title: task.title }
1971
+ })
1972
+ if (!result.success) return result
1973
+ logActivity({ entityType: 'task', entityId: taskId, action: 'claimed', actor: 'agent', actorId: agentId, summary: `Task "${result.title}" claimed by agent ${agentId}` })
1974
+ notify('tasks')
1975
+ return { success: true }
1976
+ }
1977
+
1978
+ export function listClaimableTasks(agentId: string): BoardTask[] {
1979
+ const tasks = loadTasks() as Record<string, BoardTask>
1980
+ return Object.values(tasks).filter((task) => {
1981
+ if (task.assignmentMode !== 'pool') return false
1982
+ if (task.claimedByAgentId) return false
1983
+ if (task.status !== 'queued' && task.status !== 'backlog') return false
1984
+ const candidates = Array.isArray(task.poolCandidateAgentIds) ? task.poolCandidateAgentIds : []
1985
+ return candidates.length === 0 || candidates.includes(agentId)
1986
+ })
1987
+ }
1988
+
1989
+ /** Resume any queued tasks on server boot */
1990
+ export function resumeQueue() {
1991
+ if (_resumeQueueCalled) return
1992
+ _resumeQueueCalled = true
1993
+ // Check for tasks stuck in 'queued' status but not in the queue array
1994
+ const tasks = loadTasks()
1995
+ const queue = loadQueue()
1996
+ let modified = false
1997
+ for (const task of Object.values(tasks) as BoardTask[]) {
1998
+ if (task.status === 'queued' && !queue.includes(task.id)) {
1999
+ applyTaskPolicyDefaults(task)
2000
+ log.info(TAG, `[queue] Recovering stuck queued task: "${task.title}" (${task.id})`)
2001
+ queue.push(task.id)
2002
+ task.queuedAt = task.queuedAt || Date.now()
2003
+ modified = true
2004
+ }
2005
+ }
2006
+
2007
+ // Orphan reap: all running tasks are orphans on fresh daemon startup
2008
+ let recovered = 0
2009
+ for (const task of Object.values(tasks) as BoardTask[]) {
2010
+ if (task.status !== 'running') continue
2011
+ const reason = 'process_lost: task was running when daemon restarted'
2012
+ applyTaskPolicyDefaults(task)
2013
+ const outcome = scheduleRetryOrDeadLetter(task, reason)
2014
+ if (outcome === 'retry') {
2015
+ pushQueueUnique(queue, task.id)
2016
+ }
2017
+ if (!task.comments) task.comments = []
2018
+ task.comments.push({
2019
+ id: genId(),
2020
+ author: 'System',
2021
+ text: `Orphan recovery: ${reason}`,
2022
+ createdAt: Date.now(),
2023
+ })
2024
+ modified = true
2025
+ recovered++
2026
+ }
2027
+ if (recovered > 0) {
2028
+ log.info(TAG, `[queue] Recovered ${recovered} orphaned running task(s) on boot`)
2029
+ }
2030
+
2031
+ if (modified) {
2032
+ saveQueue(queue)
2033
+ saveTasks(tasks)
2034
+ }
2035
+
2036
+ if (queue.length > 0) {
2037
+ log.info(TAG, `[queue] Resuming ${queue.length} queued task(s) on boot`)
2038
+ processNext()
2039
+ }
2040
+ }
2041
+
2042
+ /** Re-queue deferred tasks whose agents are now available. */
2043
+ export function promoteDeferred(agentId?: string): number {
2044
+ const tasks = loadTasks() as Record<string, BoardTask>
2045
+ const agents = loadAgents()
2046
+ const queue = loadQueue()
2047
+ let promoted = 0
2048
+
2049
+ for (const task of Object.values(tasks)) {
2050
+ if (task.status !== 'deferred') continue
2051
+ if (agentId && task.agentId !== agentId) continue
2052
+
2053
+ const agent = agents[task.agentId]
2054
+ if (!agent || isAgentDisabled(agent as Agent)) continue
2055
+
2056
+ // Check budget if applicable
2057
+ const typedAgent = agent as Agent
2058
+ if (typedAgent.monthlyBudget || typedAgent.dailyBudget || typedAgent.hourlyBudget) {
2059
+ try {
2060
+ const check = checkAgentBudgetLimits(typedAgent)
2061
+ if (!check.ok) continue // still over budget
2062
+ } catch {}
2063
+ }
2064
+
2065
+ task.status = 'queued'
2066
+ task.deferredReason = null
2067
+ task.updatedAt = Date.now()
2068
+ pushQueueUnique(queue, task.id)
2069
+ promoted++
2070
+ }
2071
+
2072
+ if (promoted > 0) {
2073
+ saveTasks(tasks)
2074
+ saveQueue(queue)
2075
+ notify('tasks')
2076
+ setTimeout(() => processNext(), 0)
2077
+ }
2078
+ return promoted
2079
+ }