@swarmclawai/swarmclaw 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +8 -8
  2. package/package.json +2 -2
  3. package/src/app/api/agents/route.ts +6 -3
  4. package/src/app/api/auth/route.ts +20 -10
  5. package/src/app/api/chats/[id]/devserver/route.ts +74 -48
  6. package/src/app/api/chats/[id]/route.ts +16 -1
  7. package/src/app/api/chats/route.ts +14 -6
  8. package/src/app/api/daemon/route.ts +4 -3
  9. package/src/app/api/openclaw/approvals/route.ts +3 -3
  10. package/src/app/api/wallets/[id]/route.ts +18 -4
  11. package/src/app/page.tsx +19 -23
  12. package/src/cli/index.js +1 -1
  13. package/src/cli/spec.js +1 -1
  14. package/src/components/auth/access-key-gate.tsx +5 -3
  15. package/src/components/chat/chat-area.tsx +50 -29
  16. package/src/components/chat/chat-card.tsx +4 -7
  17. package/src/components/chat/chat-header.tsx +19 -13
  18. package/src/components/chat/chat-list.tsx +11 -9
  19. package/src/components/chat/chat-tool-toggles.tsx +2 -2
  20. package/src/components/home/home-view.tsx +6 -2
  21. package/src/components/layout/app-layout.tsx +2 -3
  22. package/src/hooks/use-ws.ts +33 -7
  23. package/src/instrumentation.ts +21 -11
  24. package/src/lib/api-client.test.ts +49 -0
  25. package/src/lib/api-client.ts +53 -30
  26. package/src/lib/chats.ts +3 -0
  27. package/src/lib/runtime-env.test.ts +28 -0
  28. package/src/lib/runtime-env.ts +13 -0
  29. package/src/lib/server/chat-execution.ts +1 -1
  30. package/src/lib/server/connectors/manager.ts +4 -2
  31. package/src/lib/server/daemon-state.test.ts +23 -0
  32. package/src/lib/server/daemon-state.ts +34 -16
  33. package/src/lib/server/heartbeat-service.ts +61 -8
  34. package/src/lib/server/plugins.ts +12 -9
  35. package/src/lib/server/queue.ts +6 -1
  36. package/src/lib/server/storage.ts +100 -8
  37. package/src/lib/server/wallet-portfolio.ts +6 -0
  38. package/src/lib/session-summary.test.ts +49 -0
  39. package/src/lib/session-summary.ts +59 -0
  40. package/src/lib/ws-client.ts +1 -2
  41. package/src/proxy.test.ts +40 -0
  42. package/src/proxy.ts +23 -17
  43. package/src/stores/use-app-store.ts +66 -22
  44. package/src/stores/use-chat-store.ts +2 -2
  45. package/src/types/index.ts +4 -0
@@ -5,6 +5,7 @@ const ACCESS_KEY_STORAGE = 'sc_access_key'
5
5
  const DEFAULT_API_TIMEOUT_MS = 12_000
6
6
  const DEFAULT_GET_RETRIES = 2
7
7
  const RETRY_DELAY_BASE_MS = 300
8
+ const inflightGetRequests = new Map<string, Promise<unknown>>()
8
9
 
9
10
  export function getStoredAccessKey(): string {
10
11
  return safeStorageGet(ACCESS_KEY_STORAGE) || ''
@@ -27,6 +28,10 @@ function isAbortError(err: unknown): boolean {
27
28
  return (err as { name?: string }).name === 'AbortError'
28
29
  }
29
30
 
31
+ function buildInflightGetKey(path: string, key: string): string {
32
+ return `${key}::${path}`
33
+ }
34
+
30
35
  export async function api<T = unknown>(
31
36
  method: string,
32
37
  path: string,
@@ -47,42 +52,60 @@ export async function api<T = unknown>(
47
52
  }
48
53
  if (body) requestInit.body = JSON.stringify(body)
49
54
 
50
- for (let attempt = 0; attempt <= retries; attempt++) {
51
- try {
52
- const r = await fetchWithTimeout('/api' + path, requestInit, timeoutMs)
55
+ const runRequest = async (): Promise<T> => {
56
+ for (let attempt = 0; attempt <= retries; attempt++) {
57
+ try {
58
+ const r = await fetchWithTimeout('/api' + path, requestInit, timeoutMs)
53
59
 
54
- if (r.status === 401) {
55
- // Clear stored key on auth failure, redirect to login
56
- clearStoredAccessKey()
57
- if (typeof window !== 'undefined') {
58
- window.dispatchEvent(new Event('sc_auth_required'))
60
+ if (r.status === 401) {
61
+ // Clear stored key on auth failure, redirect to login
62
+ clearStoredAccessKey()
63
+ if (typeof window !== 'undefined') {
64
+ window.dispatchEvent(new Event('sc_auth_required'))
65
+ }
66
+ throw new Error('Unauthorized — invalid access key')
59
67
  }
60
- throw new Error('Unauthorized — invalid access key')
61
- }
62
68
 
63
- const ct = r.headers.get('content-type') || ''
69
+ const ct = r.headers.get('content-type') || ''
64
70
 
65
- if (!r.ok) {
66
- if (ct.includes('json')) {
67
- const payload = await r.json().catch(() => null) as { error?: unknown; message?: unknown } | null
68
- const msg =
69
- (typeof payload?.error === 'string' && payload.error.trim())
70
- || (typeof payload?.message === 'string' && payload.message.trim())
71
- || `Request failed (${r.status})`
72
- throw new Error(msg)
71
+ if (!r.ok) {
72
+ if (ct.includes('json')) {
73
+ const payload = await r.json().catch(() => null) as { error?: unknown; message?: unknown } | null
74
+ const msg =
75
+ (typeof payload?.error === 'string' && payload.error.trim())
76
+ || (typeof payload?.message === 'string' && payload.message.trim())
77
+ || `Request failed (${r.status})`
78
+ throw new Error(msg)
79
+ }
80
+ const text = (await r.text().catch(() => '')).trim()
81
+ throw new Error(text || `Request failed (${r.status})`)
73
82
  }
74
- const text = (await r.text().catch(() => '')).trim()
75
- throw new Error(text || `Request failed (${r.status})`)
76
- }
77
83
 
78
- if (ct.includes('json')) return r.json() as Promise<T>
79
- return r.text() as unknown as T
80
- } catch (err) {
81
- const isLastAttempt = attempt >= retries
82
- const retryable = isAbortError(err) || (err instanceof TypeError && !String(err.message || '').includes('Unauthorized'))
83
- if (isLastAttempt || !retryable) throw err
84
- await sleep(RETRY_DELAY_BASE_MS * (attempt + 1))
84
+ if (ct.includes('json')) return r.json() as Promise<T>
85
+ return r.text() as unknown as T
86
+ } catch (err) {
87
+ const isLastAttempt = attempt >= retries
88
+ const retryable = isAbortError(err) || (err instanceof TypeError && !String(err.message || '').includes('Unauthorized'))
89
+ if (isLastAttempt || !retryable) throw err
90
+ await sleep(RETRY_DELAY_BASE_MS * (attempt + 1))
91
+ }
85
92
  }
93
+ throw new Error('Request failed')
86
94
  }
87
- throw new Error('Request failed')
95
+
96
+ if (upperMethod !== 'GET') {
97
+ return runRequest()
98
+ }
99
+
100
+ const inflightKey = buildInflightGetKey(path, key)
101
+ const existing = inflightGetRequests.get(inflightKey)
102
+ if (existing) return existing as Promise<T>
103
+
104
+ const requestPromise = runRequest().finally(() => {
105
+ if (inflightGetRequests.get(inflightKey) === requestPromise) {
106
+ inflightGetRequests.delete(inflightKey)
107
+ }
108
+ })
109
+ inflightGetRequests.set(inflightKey, requestPromise)
110
+ return requestPromise
88
111
  }
package/src/lib/chats.ts CHANGED
@@ -8,6 +8,9 @@ export const fetchChats = () => api<Sessions>('GET', '/chats')
8
8
  /** @deprecated Use fetchChats */
9
9
  export const fetchSessions = fetchChats
10
10
 
11
+ export const fetchChat = (id: string) =>
12
+ api<Session>('GET', `/chats/${id}`)
13
+
11
14
  export const createChat = (
12
15
  name: string,
13
16
  cwd: string,
@@ -0,0 +1,28 @@
1
+ import assert from 'node:assert/strict'
2
+ import { describe, it } from 'node:test'
3
+
4
+ import { isDevelopmentLikeRuntime, isProductionRuntime } from './runtime-env'
5
+
6
+ describe('runtime env helpers', () => {
7
+ it('treats missing NODE_ENV as development-like', () => {
8
+ const previousNodeEnv = process.env.NODE_ENV
9
+ delete process.env.NODE_ENV
10
+
11
+ assert.equal(isDevelopmentLikeRuntime(), true)
12
+ assert.equal(isProductionRuntime(), false)
13
+
14
+ if (previousNodeEnv === undefined) delete process.env.NODE_ENV
15
+ else process.env.NODE_ENV = previousNodeEnv
16
+ })
17
+
18
+ it('detects explicit production mode', () => {
19
+ const previousNodeEnv = process.env.NODE_ENV
20
+ process.env.NODE_ENV = 'production'
21
+
22
+ assert.equal(isDevelopmentLikeRuntime(), false)
23
+ assert.equal(isProductionRuntime(), true)
24
+
25
+ if (previousNodeEnv === undefined) delete process.env.NODE_ENV
26
+ else process.env.NODE_ENV = previousNodeEnv
27
+ })
28
+ })
@@ -0,0 +1,13 @@
1
+ function normalizedNodeEnv(): string {
2
+ return typeof process.env.NODE_ENV === 'string'
3
+ ? process.env.NODE_ENV.trim().toLowerCase()
4
+ : ''
5
+ }
6
+
7
+ export function isProductionRuntime(): boolean {
8
+ return normalizedNodeEnv() === 'production'
9
+ }
10
+
11
+ export function isDevelopmentLikeRuntime(): boolean {
12
+ return !isProductionRuntime()
13
+ }
@@ -1362,7 +1362,7 @@ export async function executeSessionChatTurn(input: ExecuteChatTurnInput): Promi
1362
1362
  }
1363
1363
 
1364
1364
  // Periodic partial save so a browser refresh doesn't lose the in-flight response.
1365
- const PARTIAL_SAVE_INTERVAL_MS = 2000
1365
+ const PARTIAL_SAVE_INTERVAL_MS = 3500
1366
1366
  const partialSaveTimer = setInterval(() => {
1367
1367
  persistStreamingAssistantArtifact()
1368
1368
  }, PARTIAL_SAVE_INTERVAL_MS)
@@ -990,8 +990,10 @@ function resolveDirectSession(params: {
990
990
  session.name = sessionKey
991
991
  session.agentId = agent.id
992
992
  session.plugins = Array.isArray(session.plugins) ? session.plugins : (agent.plugins || agent.tools || [])
993
- if (!session.provider) session.provider = defaultProvider
994
- if (!session.model) session.model = defaultModel
993
+ // Always sync provider/model from agent defaults so connector sessions
994
+ // track agent config changes (e.g. model renamed from glm-5 to glm-5:cloud).
995
+ session.provider = defaultProvider
996
+ session.model = defaultModel
995
997
  if (session.credentialId === undefined) session.credentialId = agent.credentialId || null
996
998
  if (!Array.isArray(session.fallbackCredentialIds) && Array.isArray(agent.fallbackCredentialIds)) {
997
999
  session.fallbackCredentialIds = [...agent.fallbackCredentialIds]
@@ -3,6 +3,7 @@ import { describe, it } from 'node:test'
3
3
 
4
4
  import {
5
5
  buildSessionHeartbeatHealthDedupKey,
6
+ isDaemonBackgroundServicesEnabled,
6
7
  shouldSuppressSyntheticAgentHealthAlert,
7
8
  shouldSuppressSessionHeartbeatHealthAlert,
8
9
  } from './daemon-state'
@@ -47,4 +48,26 @@ describe('daemon heartbeat health alerts', () => {
47
48
  assert.equal(shouldSuppressSyntheticAgentHealthAlert('cmp-oc-2026-03-08t19-15-21-755z-agent'), true)
48
49
  assert.equal(shouldSuppressSyntheticAgentHealthAlert('agent-real-123'), false)
49
50
  })
51
+
52
+ it('respects daemon background service overrides', () => {
53
+ const previousNodeEnv = process.env.NODE_ENV
54
+ const previousFlag = process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES
55
+
56
+ process.env.NODE_ENV = 'development'
57
+ process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES = 'true'
58
+ assert.equal(isDaemonBackgroundServicesEnabled(), true)
59
+
60
+ process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES = 'false'
61
+ assert.equal(isDaemonBackgroundServicesEnabled(), false)
62
+
63
+ process.env.NODE_ENV = 'production'
64
+ process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES = 'true'
65
+ assert.equal(isDaemonBackgroundServicesEnabled(), true)
66
+
67
+ if (previousNodeEnv === undefined) delete process.env.NODE_ENV
68
+ else process.env.NODE_ENV = previousNodeEnv
69
+
70
+ if (previousFlag === undefined) delete process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES
71
+ else process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES = previousFlag
72
+ })
50
73
  })
@@ -1,6 +1,6 @@
1
1
  import { loadQueue, loadSchedules, loadSessions, saveSessions, loadConnectors, saveConnectors, loadWebhookRetryQueue, upsertWebhookRetry, deleteWebhookRetry, loadWebhooks, loadAgents, loadSettings, appendWebhookLog, loadCredentials, decryptKey } from './storage'
2
2
  import { notify } from './ws-hub'
3
- import { processNext, cleanupFinishedTaskSessions, validateCompletedTasksQueue, recoverStalledRunningTasks } from './queue'
3
+ import { processNext, cleanupFinishedTaskSessions, validateCompletedTasksQueue, recoverStalledRunningTasks, resumeQueue } from './queue'
4
4
  import { startScheduler, stopScheduler } from './scheduler'
5
5
  import { sweepOrphanedBrowsers, getActiveBrowserCount } from './session-tools'
6
6
  import {
@@ -24,6 +24,7 @@ import { enqueueSessionRun } from './session-run-manager'
24
24
  import { WORKSPACE_DIR } from './data-dir'
25
25
  import { DEFAULT_HEARTBEAT_INTERVAL_SEC } from '@/lib/heartbeat-defaults'
26
26
  import { genId } from '@/lib/id'
27
+ import { isProductionRuntime } from '@/lib/runtime-env'
27
28
  import path from 'node:path'
28
29
  import type { Session, WebhookRetryEntry } from '@/types'
29
30
  import { createNotification } from '@/lib/server/create-notification'
@@ -40,7 +41,7 @@ const QUEUE_CHECK_INTERVAL = 30_000 // 30 seconds
40
41
  const BROWSER_SWEEP_INTERVAL = 60_000 // 60 seconds
41
42
  const BROWSER_MAX_AGE = 10 * 60 * 1000 // 10 minutes idle = orphaned
42
43
  const HEALTH_CHECK_INTERVAL = 120_000 // 2 minutes
43
- const CONNECTOR_HEALTH_CHECK_INTERVAL = 5_000 // 5 seconds
44
+ const CONNECTOR_HEALTH_CHECK_INTERVAL = 15_000 // 15 seconds
44
45
  const MEMORY_CONSOLIDATION_INTERVAL = 6 * 3600_000 // 6 hours
45
46
  const MEMORY_CONSOLIDATION_INITIAL_DELAY = 60_000 // 1 minute after daemon start
46
47
  const STALE_MULTIPLIER = 4 // session is stale after N × heartbeat interval
@@ -62,7 +63,11 @@ function parseBoolish(value: unknown, fallback: boolean): boolean {
62
63
  }
63
64
 
64
65
  function daemonAutostartEnvEnabled(): boolean {
65
- return parseBoolish(process.env.SWARMCLAW_DAEMON_AUTOSTART, true)
66
+ return parseBoolish(process.env.SWARMCLAW_DAEMON_AUTOSTART, isProductionRuntime())
67
+ }
68
+
69
+ export function isDaemonBackgroundServicesEnabled(): boolean {
70
+ return parseBoolish(process.env.SWARMCLAW_DAEMON_BACKGROUND_SERVICES, true)
66
71
  }
67
72
 
68
73
  function parseHeartbeatIntervalSec(value: unknown, fallback = DEFAULT_HEARTBEAT_INTERVAL_SEC): number {
@@ -186,11 +191,9 @@ export function startDaemon(options?: { source?: string; manualStart?: boolean }
186
191
  // (for example health monitor) were introduced in newer code.
187
192
  startQueueProcessor()
188
193
  startBrowserSweep()
189
- startHealthMonitor()
190
- startConnectorHealthMonitor()
191
194
  startHeartbeatService()
192
195
  startMemoryConsolidation()
193
- startEvalScheduler()
196
+ syncDaemonBackgroundServices()
194
197
  return
195
198
  }
196
199
  ds.running = true
@@ -201,14 +204,13 @@ export function startDaemon(options?: { source?: string; manualStart?: boolean }
201
204
  validateCompletedTasksQueue()
202
205
  cleanupFinishedTaskSessions()
203
206
  recoverStaleDelegationJobs()
207
+ resumeQueue()
204
208
  startScheduler()
205
209
  startQueueProcessor()
206
210
  startBrowserSweep()
207
- startHealthMonitor()
208
- startConnectorHealthMonitor()
209
211
  startHeartbeatService()
210
212
  startMemoryConsolidation()
211
- startEvalScheduler()
213
+ syncDaemonBackgroundServices()
212
214
  } catch (err: unknown) {
213
215
  ds.running = false
214
216
  notify('daemon')
@@ -216,10 +218,12 @@ export function startDaemon(options?: { source?: string; manualStart?: boolean }
216
218
  throw err
217
219
  }
218
220
 
219
- // Auto-start enabled connectors
220
- autoStartConnectors().catch((err: unknown) => {
221
- console.error('[daemon] Error auto-starting connectors:', err instanceof Error ? err.message : String(err))
222
- })
221
+ if (isDaemonBackgroundServicesEnabled()) {
222
+ // Auto-start enabled connectors only when the full background stack is enabled.
223
+ autoStartConnectors().catch((err: unknown) => {
224
+ console.error('[daemon] Error auto-starting connectors:', err instanceof Error ? err.message : String(err))
225
+ })
226
+ }
223
227
  }
224
228
 
225
229
  export function stopDaemon(options?: { source?: string; manualStop?: boolean }) {
@@ -272,6 +276,7 @@ function startQueueProcessor() {
272
276
  await processNext()
273
277
  ds.lastProcessedAt = Date.now()
274
278
  }
279
+ if (!isDaemonBackgroundServicesEnabled()) return
275
280
  // OpenClaw gateway lifecycle: lazy connect when openclaw agents exist, disconnect when none remain
276
281
  try {
277
282
  if (hasOpenClawAgents()) {
@@ -901,6 +906,18 @@ function stopHealthMonitor() {
901
906
  }
902
907
  }
903
908
 
909
+ function syncDaemonBackgroundServices() {
910
+ if (isDaemonBackgroundServicesEnabled()) {
911
+ startHealthMonitor()
912
+ startConnectorHealthMonitor()
913
+ startEvalScheduler()
914
+ return
915
+ }
916
+ stopHealthMonitor()
917
+ stopConnectorHealthMonitor()
918
+ stopEvalScheduler()
919
+ }
920
+
904
921
  function startConnectorHealthMonitor() {
905
922
  if (ds.connectorHealthIntervalId) return
906
923
 
@@ -1038,13 +1055,11 @@ function refreshDaemonTimersForHotReload() {
1038
1055
  if (ds.healthIntervalId) {
1039
1056
  clearInterval(ds.healthIntervalId)
1040
1057
  ds.healthIntervalId = null
1041
- startHealthMonitor()
1042
1058
  }
1043
1059
 
1044
1060
  if (ds.connectorHealthIntervalId) {
1045
1061
  clearInterval(ds.connectorHealthIntervalId)
1046
1062
  ds.connectorHealthIntervalId = null
1047
- startConnectorHealthMonitor()
1048
1063
  }
1049
1064
 
1050
1065
  if (ds.memoryConsolidationTimeoutId || ds.memoryConsolidationIntervalId) {
@@ -1054,8 +1069,9 @@ function refreshDaemonTimersForHotReload() {
1054
1069
 
1055
1070
  if (ds.evalSchedulerIntervalId) {
1056
1071
  stopEvalScheduler()
1057
- startEvalScheduler()
1058
1072
  }
1073
+
1074
+ syncDaemonBackgroundServices()
1059
1075
  }
1060
1076
 
1061
1077
  // In dev/HMR, the daemon state survives on globalThis while interval callbacks keep
@@ -1095,6 +1111,8 @@ export function getDaemonStatus() {
1095
1111
  running: ds.running,
1096
1112
  schedulerActive: ds.running,
1097
1113
  autostartEnabled: daemonAutostartEnvEnabled(),
1114
+ backgroundServicesEnabled: isDaemonBackgroundServicesEnabled(),
1115
+ reducedMode: !isDaemonBackgroundServicesEnabled(),
1098
1116
  manualStopRequested: ds.manualStopRequested,
1099
1117
  queueLength: queue.length,
1100
1118
  lastProcessed: ds.lastProcessedAt,
@@ -16,12 +16,21 @@ import { buildMainLoopHeartbeatPrompt, isMainSession } from './main-agent-loop'
16
16
  import { ensureAgentThreadSession } from './agent-thread-session'
17
17
  import { isAgentDisabled } from './agent-availability'
18
18
 
19
- const HEARTBEAT_TICK_MS = 5_000
19
+ const HEARTBEAT_TICK_MS = 60_000
20
+ const MAX_CONCURRENT_HEARTBEATS = 5
21
+ const BACKOFF_BASE_MS = 10_000
22
+ const BACKOFF_MAX_MS = 5 * 60_000
23
+
24
+ interface FailureRecord {
25
+ count: number
26
+ lastFailedAt: number
27
+ }
20
28
 
21
29
  interface HeartbeatState {
22
30
  timer: ReturnType<typeof setInterval> | null
23
31
  running: boolean
24
32
  lastBySession: Map<string, number>
33
+ failures: Map<string, FailureRecord>
25
34
  }
26
35
 
27
36
  const globalKey = '__swarmclaw_heartbeat_service__' as const
@@ -30,6 +39,7 @@ const state: HeartbeatState = globalScope[globalKey] ?? (globalScope[globalKey]
30
39
  timer: null,
31
40
  running: false,
32
41
  lastBySession: new Map<string, number>(),
42
+ failures: new Map<string, FailureRecord>(),
33
43
  })
34
44
 
35
45
  function parseIntBounded(value: unknown, fallback: number, min: number, max: number): number {
@@ -355,6 +365,13 @@ function shouldRunHeartbeats(settings: Record<string, any>): boolean {
355
365
  return loopMode === 'ongoing'
356
366
  }
357
367
 
368
+ function isBackedOff(sessionId: string, now: number): boolean {
369
+ const record = state.failures.get(sessionId)
370
+ if (!record || record.count === 0) return false
371
+ const backoffMs = Math.min(BACKOFF_BASE_MS * Math.pow(2, record.count - 1), BACKOFF_MAX_MS)
372
+ return now < record.lastFailedAt + backoffMs
373
+ }
374
+
358
375
  async function tickHeartbeats() {
359
376
  const settings = loadSettings()
360
377
  const globalOngoing = shouldRunHeartbeats(settings)
@@ -366,12 +383,23 @@ async function tickHeartbeats() {
366
383
  }
367
384
 
368
385
  const agents = loadAgents()
369
- for (const agent of Object.values(agents) as any[]) {
370
- if (!agent?.id || agent.heartbeatEnabled !== true || isAgentDisabled(agent)) continue
386
+ const hbAgents = (Object.values(agents) as any[]).filter(
387
+ (a) => a?.id && a.heartbeatEnabled === true && !isAgentDisabled(a),
388
+ )
389
+ for (const agent of hbAgents) {
371
390
  ensureAgentThreadSession(String(agent.id))
372
391
  }
392
+ const hasScopedAgents = hbAgents.length > 0
393
+
394
+ // Short-circuit: if no agents have heartbeat enabled and global loop mode is
395
+ // bounded, skip the expensive loadSessions() — nothing will be eligible.
396
+ if (!hasScopedAgents && !globalOngoing) {
397
+ // Prune any stale tracking entries
398
+ if (state.lastBySession.size > 0) state.lastBySession.clear()
399
+ return
400
+ }
401
+
373
402
  const sessions = loadSessions()
374
- const hasScopedAgents = Object.values(agents).some((a: any) => a?.heartbeatEnabled === true && !isAgentDisabled(a))
375
403
 
376
404
  // Prune tracked sessions that no longer exist or have heartbeat disabled
377
405
  for (const trackedId of state.lastBySession.keys()) {
@@ -386,7 +414,11 @@ async function tickHeartbeats() {
386
414
  }
387
415
  }
388
416
 
417
+ let enqueued = 0
418
+
389
419
  for (const session of Object.values(sessions) as any[]) {
420
+ if (enqueued >= MAX_CONCURRENT_HEARTBEATS) break
421
+
390
422
  if (!session?.id) continue
391
423
  if (session.sessionType && session.sessionType !== 'human') continue
392
424
 
@@ -406,6 +438,8 @@ async function tickHeartbeats() {
406
438
  const cfg = heartbeatConfigForSession(session, settings, agents)
407
439
  if (!cfg.enabled) continue
408
440
 
441
+ if (isBackedOff(session.id, now)) continue
442
+
409
443
  // For sessions with explicit opt-in, use a shorter idle threshold (just intervalSec * 2).
410
444
  // For inherited/global heartbeats, keep the 180s minimum to avoid noisy auto-fire.
411
445
  const defaultIdleSec = explicitOptIn
@@ -461,11 +495,20 @@ async function tickHeartbeats() {
461
495
  },
462
496
  })
463
497
 
464
- // Set timestamp AFTER successful enqueue so a busy session retries next tick
498
+ enqueued++
465
499
  state.lastBySession.set(session.id, now)
466
500
 
467
- enqueue.promise.catch((err) => {
468
- log.warn('heartbeat', `Heartbeat run failed for session ${session.id}`, err?.message || String(err))
501
+ const sid = session.id as string
502
+ enqueue.promise.then(() => {
503
+ state.failures.delete(sid)
504
+ }).catch((err: unknown) => {
505
+ const prev = state.failures.get(sid)
506
+ state.failures.set(sid, {
507
+ count: (prev?.count ?? 0) + 1,
508
+ lastFailedAt: Date.now(),
509
+ })
510
+ const msg = err instanceof Error ? err.message : String(err)
511
+ log.warn('heartbeat', `Heartbeat run failed for session ${sid}`, msg)
469
512
  })
470
513
  }
471
514
  }
@@ -475,11 +518,20 @@ async function tickHeartbeats() {
475
518
  * doesn't cause every session to fire a heartbeat immediately on the first tick.
476
519
  */
477
520
  function seedLastActive() {
521
+ const agents = loadAgents()
522
+ const hbAgentIds = new Set(
523
+ (Object.values(agents) as Record<string, unknown>[])
524
+ .filter((a) => a?.heartbeatEnabled === true && !isAgentDisabled(a))
525
+ .map((a) => String(a.id)),
526
+ )
478
527
  const sessions = loadSessions()
479
528
  for (const session of Object.values(sessions) as any[]) {
480
529
  if (!session?.id) continue
530
+ // Only seed sessions that are actually heartbeat-eligible
531
+ const eligible = session.heartbeatEnabled === true
532
+ || (session.agentId && hbAgentIds.has(session.agentId))
533
+ if (!eligible) continue
481
534
  if (typeof session.lastActiveAt === 'number' && session.lastActiveAt > 0) {
482
- // Only seed entries we don't already have (preserves HMR state)
483
535
  if (!state.lastBySession.has(session.id)) {
484
536
  state.lastBySession.set(session.id, session.lastActiveAt)
485
537
  }
@@ -515,6 +567,7 @@ export function stopHeartbeatService() {
515
567
  export function restartHeartbeatService() {
516
568
  stopHeartbeatService()
517
569
  state.lastBySession.clear()
570
+ state.failures.clear()
518
571
  startHeartbeatService()
519
572
  }
520
573
 
@@ -638,6 +638,17 @@ interface LoadedPlugin {
638
638
  isBuiltin?: boolean
639
639
  }
640
640
 
641
+ function createPluginRequire(): NodeRequire | null {
642
+ try {
643
+ return createRequire(path.join(process.cwd(), 'package.json'))
644
+ } catch (err: unknown) {
645
+ log.warn('plugins', 'createRequire failed; external plugins disabled', {
646
+ error: err instanceof Error ? err.message : String(err),
647
+ })
648
+ return null
649
+ }
650
+ }
651
+
641
652
  export interface ExternalPluginToolEntry {
642
653
  pluginId: string
643
654
  pluginName: string
@@ -1000,15 +1011,7 @@ class PluginManager {
1000
1011
  try {
1001
1012
  this.ensurePluginDirs()
1002
1013
  const files = fs.readdirSync(PLUGINS_DIR).filter(f => f.endsWith('.js') || f.endsWith('.mjs'))
1003
-
1004
- let dynamicRequire: NodeRequire | null = null
1005
- try {
1006
- dynamicRequire = createRequire(path.join(process.cwd(), 'package.json'))
1007
- } catch (err: unknown) {
1008
- log.warn('plugins', 'createRequire failed; external plugins disabled', {
1009
- error: err instanceof Error ? err.message : String(err),
1010
- })
1011
- }
1014
+ const dynamicRequire = createPluginRequire()
1012
1015
 
1013
1016
  if (dynamicRequire) {
1014
1017
  for (const file of files) {
@@ -1967,7 +1967,7 @@ export function recoverStalledRunningTasks(): { recovered: number; deadLettered:
1967
1967
  const recoveredAt = Date.now()
1968
1968
  task.status = 'queued'
1969
1969
  task.queuedAt = task.queuedAt || recoveredAt
1970
- task.retryScheduledAt = null
1970
+ task.retryScheduledAt = Date.now() + 30_000
1971
1971
  task.updatedAt = recoveredAt
1972
1972
  task.error = 'Recovered inconsistent running state (missing startedAt); requeued.'
1973
1973
  if (!task.comments) task.comments = []
@@ -1994,6 +1994,7 @@ export function recoverStalledRunningTasks(): { recovered: number; deadLettered:
1994
1994
  disableSessionHeartbeat(task.sessionId)
1995
1995
  changed = true
1996
1996
  if (state === 'retry') {
1997
+ task.retryScheduledAt = Date.now() + 30_000
1997
1998
  pushQueueUnique(queue, task.id)
1998
1999
  recovered++
1999
2000
  pushMainLoopEventToMainSessions({
@@ -2020,8 +2021,12 @@ export function recoverStalledRunningTasks(): { recovered: number; deadLettered:
2020
2021
  return { recovered, deadLettered }
2021
2022
  }
2022
2023
 
2024
+ let _resumeQueueCalled = false
2025
+
2023
2026
  /** Resume any queued tasks on server boot */
2024
2027
  export function resumeQueue() {
2028
+ if (_resumeQueueCalled) return
2029
+ _resumeQueueCalled = true
2025
2030
  // Check for tasks stuck in 'queued' status but not in the queue array
2026
2031
  const tasks = loadTasks()
2027
2032
  const queue = loadQueue()