@swarmclawai/swarmclaw 1.9.22 → 1.9.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/package.json +2 -2
- package/src/app/api/gateways/[id]/control/route.ts +22 -0
- package/src/app/api/gateways/control-route.test.ts +86 -0
- package/src/components/providers/provider-list.tsx +75 -2
- package/src/components/schedules/schedule-console.tsx +3 -0
- package/src/features/gateways/queries.ts +17 -0
- package/src/lib/server/agents/agent-runtime-config.test.ts +80 -0
- package/src/lib/server/agents/agent-runtime-config.ts +55 -4
- package/src/lib/server/connectors/slack.test.ts +30 -0
- package/src/lib/server/connectors/slack.ts +8 -3
- package/src/lib/server/gateways/gateway-profile-service.ts +71 -0
- package/src/lib/server/operations/operation-pulse.test.ts +34 -0
- package/src/lib/server/operations/operation-pulse.ts +18 -0
- package/src/lib/server/runtime/scheduler.test.ts +129 -0
- package/src/lib/server/runtime/scheduler.ts +62 -35
- package/src/lib/server/schedules/schedule-history.test.ts +14 -0
- package/src/lib/server/schedules/schedule-history.ts +1 -0
- package/src/lib/server/schedules/schedule-lifecycle.ts +5 -28
- package/src/lib/server/schedules/schedule-normalization.ts +6 -28
- package/src/lib/server/schedules/schedule-timing.test.ts +80 -0
- package/src/lib/server/schedules/schedule-timing.ts +179 -0
- package/src/lib/server/tasks/task-lifecycle.ts +35 -5
- package/src/lib/validation/schemas.ts +5 -0
- package/src/types/misc.ts +12 -0
- package/src/types/schedule.ts +2 -2
- package/src/types/task.ts +1 -0
|
@@ -27,6 +27,29 @@ function normalizeNullableNumber(value: unknown): number | null {
|
|
|
27
27
|
return typeof value === 'number' && Number.isFinite(value) ? value : null
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
function normalizeLifecycleState(value: unknown): NonNullable<GatewayProfile['lifecycleState']> {
|
|
31
|
+
return value === 'draining' || value === 'cordoned' ? value : 'active'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function normalizeControlAction(value: unknown): GatewayProfile['lastControlAction'] {
|
|
35
|
+
return value === 'activate' || value === 'drain' || value === 'cordon' || value === 'restart'
|
|
36
|
+
? value
|
|
37
|
+
: null
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function normalizeControlRequest(value: unknown): GatewayProfile['controlRequest'] {
|
|
41
|
+
if (!value || typeof value !== 'object') return null
|
|
42
|
+
const request = value as Record<string, unknown>
|
|
43
|
+
const requestedAt = normalizeNullableNumber(request.requestedAt)
|
|
44
|
+
if (request.action !== 'restart' || !requestedAt) return null
|
|
45
|
+
return {
|
|
46
|
+
action: 'restart',
|
|
47
|
+
requestedAt,
|
|
48
|
+
source: 'swarmclaw',
|
|
49
|
+
reason: normalizeText(request.reason),
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
30
53
|
function normalizeDeployment(value: unknown): OpenClawDeploymentConfig | null {
|
|
31
54
|
if (!value || typeof value !== 'object') return null
|
|
32
55
|
const deployment = value as Record<string, unknown>
|
|
@@ -106,6 +129,11 @@ export function createGatewayProfile(input: Record<string, unknown>): GatewayPro
|
|
|
106
129
|
wsUrl: normalizeText(input.wsUrl),
|
|
107
130
|
credentialId: normalizeText(input.credentialId),
|
|
108
131
|
status: typeof input.status === 'string' && input.status.trim() ? input.status as GatewayProfile['status'] : 'unknown',
|
|
132
|
+
lifecycleState: normalizeLifecycleState(input.lifecycleState),
|
|
133
|
+
lastControlAction: normalizeControlAction(input.lastControlAction),
|
|
134
|
+
lastControlActionAt: normalizeNullableNumber(input.lastControlActionAt),
|
|
135
|
+
lastControlReason: normalizeText(input.lastControlReason),
|
|
136
|
+
controlRequest: normalizeControlRequest(input.controlRequest),
|
|
109
137
|
notes: typeof input.notes === 'string' ? input.notes : null,
|
|
110
138
|
tags: normalizeTags(input.tags),
|
|
111
139
|
lastError: null,
|
|
@@ -149,6 +177,11 @@ export function updateGatewayProfile(id: string, input: Record<string, unknown>)
|
|
|
149
177
|
: 'unknown'
|
|
150
178
|
gateway.status = nextStatus
|
|
151
179
|
}
|
|
180
|
+
if (input.lifecycleState !== undefined) gateway.lifecycleState = normalizeLifecycleState(input.lifecycleState)
|
|
181
|
+
if (input.lastControlAction !== undefined) gateway.lastControlAction = normalizeControlAction(input.lastControlAction)
|
|
182
|
+
if (input.lastControlActionAt !== undefined) gateway.lastControlActionAt = normalizeNullableNumber(input.lastControlActionAt)
|
|
183
|
+
if (input.lastControlReason !== undefined) gateway.lastControlReason = normalizeText(input.lastControlReason)
|
|
184
|
+
if (input.controlRequest !== undefined) gateway.controlRequest = normalizeControlRequest(input.controlRequest)
|
|
152
185
|
if (input.notes !== undefined) gateway.notes = typeof input.notes === 'string' ? input.notes : null
|
|
153
186
|
if (input.tags !== undefined) gateway.tags = normalizeTags(input.tags)
|
|
154
187
|
if (input.lastError !== undefined) gateway.lastError = typeof input.lastError === 'string' ? input.lastError : null
|
|
@@ -167,6 +200,44 @@ export function updateGatewayProfile(id: string, input: Record<string, unknown>)
|
|
|
167
200
|
return gateway
|
|
168
201
|
}
|
|
169
202
|
|
|
203
|
+
export function controlGatewayProfile(
|
|
204
|
+
id: string,
|
|
205
|
+
input: { action: NonNullable<GatewayProfile['lastControlAction']>; reason?: string | null },
|
|
206
|
+
now = Date.now(),
|
|
207
|
+
): GatewayProfile | null {
|
|
208
|
+
const gateways = loadGatewayProfiles()
|
|
209
|
+
const gateway = gateways[id]
|
|
210
|
+
if (!gateway) return null
|
|
211
|
+
|
|
212
|
+
const action = normalizeControlAction(input.action)
|
|
213
|
+
if (!action) return null
|
|
214
|
+
|
|
215
|
+
gateway.lifecycleState = action === 'drain'
|
|
216
|
+
? 'draining'
|
|
217
|
+
: action === 'cordon'
|
|
218
|
+
? 'cordoned'
|
|
219
|
+
: action === 'activate'
|
|
220
|
+
? 'active'
|
|
221
|
+
: normalizeLifecycleState(gateway.lifecycleState)
|
|
222
|
+
gateway.lastControlAction = action
|
|
223
|
+
gateway.lastControlActionAt = now
|
|
224
|
+
gateway.lastControlReason = normalizeText(input.reason)
|
|
225
|
+
gateway.controlRequest = action === 'restart'
|
|
226
|
+
? {
|
|
227
|
+
action: 'restart',
|
|
228
|
+
requestedAt: now,
|
|
229
|
+
source: 'swarmclaw',
|
|
230
|
+
reason: normalizeText(input.reason),
|
|
231
|
+
}
|
|
232
|
+
: null
|
|
233
|
+
gateway.updatedAt = now
|
|
234
|
+
|
|
235
|
+
gateways[id] = gateway
|
|
236
|
+
saveGatewayProfiles(gateways)
|
|
237
|
+
notify('gateways')
|
|
238
|
+
return gateway
|
|
239
|
+
}
|
|
240
|
+
|
|
170
241
|
export function deleteGatewayProfileAndDetachAgents(id: string): boolean {
|
|
171
242
|
const gateways = loadGatewayProfiles()
|
|
172
243
|
const deleted = gateways[id]
|
|
@@ -88,6 +88,11 @@ function gateway(overrides: Partial<GatewayProfile>): GatewayProfile {
|
|
|
88
88
|
wsUrl: overrides.wsUrl ?? null,
|
|
89
89
|
credentialId: overrides.credentialId ?? null,
|
|
90
90
|
status: overrides.status || 'healthy',
|
|
91
|
+
lifecycleState: overrides.lifecycleState || 'active',
|
|
92
|
+
lastControlAction: overrides.lastControlAction ?? null,
|
|
93
|
+
lastControlActionAt: overrides.lastControlActionAt ?? null,
|
|
94
|
+
lastControlReason: overrides.lastControlReason ?? null,
|
|
95
|
+
controlRequest: overrides.controlRequest ?? null,
|
|
91
96
|
notes: overrides.notes ?? null,
|
|
92
97
|
tags: overrides.tags || [],
|
|
93
98
|
lastError: overrides.lastError ?? null,
|
|
@@ -190,4 +195,33 @@ describe('operation pulse', () => {
|
|
|
190
195
|
assert.ok((pulse.actions[0]?.summary || '').includes('no available OpenClaw execution environments'))
|
|
191
196
|
assert.equal(pulse.actions[0]?.evidence.includes('0/2 environments'), true)
|
|
192
197
|
})
|
|
198
|
+
|
|
199
|
+
it('surfaces gateways that are unavailable for automatic new work', () => {
|
|
200
|
+
const pulse = buildOperationPulse({
|
|
201
|
+
range: '24h',
|
|
202
|
+
now,
|
|
203
|
+
missions: [],
|
|
204
|
+
runs: [],
|
|
205
|
+
approvals: [],
|
|
206
|
+
connectors: [],
|
|
207
|
+
gateways: [
|
|
208
|
+
gateway({
|
|
209
|
+
lifecycleState: 'cordoned',
|
|
210
|
+
stats: {
|
|
211
|
+
nodeCount: 1,
|
|
212
|
+
connectedNodeCount: 1,
|
|
213
|
+
environmentCount: 1,
|
|
214
|
+
availableEnvironmentCount: 1,
|
|
215
|
+
lastTopologyCheckedAt: now - 1000,
|
|
216
|
+
},
|
|
217
|
+
}),
|
|
218
|
+
],
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
assert.equal(pulse.kpis.gatewayAttention, 1)
|
|
222
|
+
assert.equal(pulse.actions[0]?.kind, 'gateway')
|
|
223
|
+
assert.equal(pulse.actions[0]?.severity, 'medium')
|
|
224
|
+
assert.ok((pulse.actions[0]?.summary || '').includes('cordoned from automatic new work'))
|
|
225
|
+
assert.equal(pulse.actions[0]?.evidence.includes('lifecycle:cordoned'), true)
|
|
226
|
+
})
|
|
193
227
|
})
|
|
@@ -84,8 +84,10 @@ function gatewayAttentionReason(gateway: GatewayProfile, now: number): {
|
|
|
84
84
|
const errorCount = gateway.stats?.lastTopologyErrorCount || 0
|
|
85
85
|
const checkedAt = gateway.stats?.lastTopologyCheckedAt || gateway.lastCheckedAt || null
|
|
86
86
|
const staleTopology = !checkedAt || now - checkedAt > GATEWAY_TOPOLOGY_STALE_MS
|
|
87
|
+
const lifecycleState = gateway.lifecycleState || 'active'
|
|
87
88
|
const evidence = [
|
|
88
89
|
`status:${gateway.status}`,
|
|
90
|
+
`lifecycle:${lifecycleState}`,
|
|
89
91
|
`${gateway.stats?.connectedNodeCount || 0}/${gateway.stats?.nodeCount || 0} nodes`,
|
|
90
92
|
`${gateway.stats?.availableEnvironmentCount || 0}/${gateway.stats?.environmentCount || 0} environments`,
|
|
91
93
|
]
|
|
@@ -106,6 +108,22 @@ function gatewayAttentionReason(gateway: GatewayProfile, now: number): {
|
|
|
106
108
|
}
|
|
107
109
|
}
|
|
108
110
|
|
|
111
|
+
if (lifecycleState === 'cordoned') {
|
|
112
|
+
return {
|
|
113
|
+
severity: 'medium',
|
|
114
|
+
summary: `${gateway.name} is cordoned from automatic new work.`,
|
|
115
|
+
evidence,
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (lifecycleState === 'draining') {
|
|
120
|
+
return {
|
|
121
|
+
severity: 'medium',
|
|
122
|
+
summary: `${gateway.name} is draining and will not receive automatic new work.`,
|
|
123
|
+
evidence,
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
109
127
|
if (errorCount > 0) {
|
|
110
128
|
return {
|
|
111
129
|
severity: 'medium',
|
|
@@ -202,6 +202,135 @@ describe('scheduler wake targeting', () => {
|
|
|
202
202
|
assert.deepEqual(output.deliveryModes, ['silent'])
|
|
203
203
|
})
|
|
204
204
|
|
|
205
|
+
it('repairs stale future cron next-run slots without launching a run', () => {
|
|
206
|
+
const output = runSchedulerWithTempDataDir(`
|
|
207
|
+
const storageMod = await import('@/lib/server/storage')
|
|
208
|
+
const schedulerMod = await import('@/lib/server/runtime/scheduler')
|
|
209
|
+
const storage = storageMod.default || storageMod
|
|
210
|
+
const scheduler = schedulerMod.default || schedulerMod
|
|
211
|
+
|
|
212
|
+
const now = Date.parse('2026-05-06T07:30:00.000Z')
|
|
213
|
+
const staleFuture = Date.parse('2026-05-12T08:00:00.000Z')
|
|
214
|
+
|
|
215
|
+
storage.saveSchedules({
|
|
216
|
+
'sched-cron': {
|
|
217
|
+
id: 'sched-cron',
|
|
218
|
+
name: 'Daily status',
|
|
219
|
+
agentId: 'agent-1',
|
|
220
|
+
taskPrompt: 'Send the daily status.',
|
|
221
|
+
scheduleType: 'cron',
|
|
222
|
+
cron: '0 8 * * *',
|
|
223
|
+
timezone: 'UTC',
|
|
224
|
+
status: 'active',
|
|
225
|
+
nextRunAt: staleFuture,
|
|
226
|
+
createdAt: now - 10_000,
|
|
227
|
+
updatedAt: now - 10_000,
|
|
228
|
+
},
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
await scheduler.runSchedulerTickForTests(now)
|
|
232
|
+
const schedule = storage.loadSchedules()['sched-cron']
|
|
233
|
+
|
|
234
|
+
console.log(JSON.stringify({
|
|
235
|
+
status: schedule.status,
|
|
236
|
+
nextRunAt: schedule.nextRunAt,
|
|
237
|
+
taskCount: Object.keys(storage.loadTasks()).length,
|
|
238
|
+
historyAction: schedule.history?.[0]?.action || null,
|
|
239
|
+
historyReason: schedule.history?.[0]?.metadata?.reason || null,
|
|
240
|
+
}))
|
|
241
|
+
`)
|
|
242
|
+
|
|
243
|
+
assert.equal(output.status, 'active')
|
|
244
|
+
assert.equal(output.nextRunAt, Date.parse('2026-05-06T08:00:00.000Z'))
|
|
245
|
+
assert.equal(output.taskCount, 0)
|
|
246
|
+
assert.equal(output.historyAction, 'repaired')
|
|
247
|
+
assert.equal(output.historyReason, 'stale_future')
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
it('advances cron schedules from the scheduler tick time after firing', () => {
|
|
251
|
+
const output = runSchedulerWithTempDataDir(`
|
|
252
|
+
const storageMod = await import('@/lib/server/storage')
|
|
253
|
+
const schedulerMod = await import('@/lib/server/runtime/scheduler')
|
|
254
|
+
const heartbeatWakeMod = await import('@/lib/server/runtime/heartbeat-wake')
|
|
255
|
+
const storage = storageMod.default || storageMod
|
|
256
|
+
const scheduler = schedulerMod.default || schedulerMod
|
|
257
|
+
const heartbeatWake = heartbeatWakeMod.default || heartbeatWakeMod
|
|
258
|
+
|
|
259
|
+
const now = Date.parse('2030-01-01T08:00:30.000Z')
|
|
260
|
+
const dueAt = Date.parse('2030-01-01T08:00:00.000Z')
|
|
261
|
+
|
|
262
|
+
storage.saveAgents({
|
|
263
|
+
'agent-1': {
|
|
264
|
+
id: 'agent-1',
|
|
265
|
+
name: 'Daily Agent',
|
|
266
|
+
description: '',
|
|
267
|
+
systemPrompt: '',
|
|
268
|
+
provider: 'openai',
|
|
269
|
+
model: 'gpt-test',
|
|
270
|
+
threadSessionId: 'thread-main',
|
|
271
|
+
createdAt: now - 10_000,
|
|
272
|
+
updatedAt: now - 10_000,
|
|
273
|
+
},
|
|
274
|
+
})
|
|
275
|
+
|
|
276
|
+
storage.saveSessions({
|
|
277
|
+
'thread-main': {
|
|
278
|
+
id: 'thread-main',
|
|
279
|
+
name: 'Daily Agent',
|
|
280
|
+
cwd: process.env.WORKSPACE_DIR,
|
|
281
|
+
user: 'tester',
|
|
282
|
+
provider: 'openai',
|
|
283
|
+
model: 'gpt-test',
|
|
284
|
+
claudeSessionId: null,
|
|
285
|
+
messages: [],
|
|
286
|
+
createdAt: now - 10_000,
|
|
287
|
+
lastActiveAt: now - 5_000,
|
|
288
|
+
active: true,
|
|
289
|
+
currentRunId: null,
|
|
290
|
+
agentId: 'agent-1',
|
|
291
|
+
shortcutForAgentId: 'agent-1',
|
|
292
|
+
},
|
|
293
|
+
})
|
|
294
|
+
|
|
295
|
+
storage.saveSchedules({
|
|
296
|
+
'sched-cron': {
|
|
297
|
+
id: 'sched-cron',
|
|
298
|
+
name: 'Daily wake',
|
|
299
|
+
agentId: 'agent-1',
|
|
300
|
+
taskPrompt: 'Wake for the daily status.',
|
|
301
|
+
taskMode: 'wake_only',
|
|
302
|
+
message: 'Run the daily status.',
|
|
303
|
+
scheduleType: 'cron',
|
|
304
|
+
cron: '0 8 * * *',
|
|
305
|
+
timezone: 'UTC',
|
|
306
|
+
status: 'active',
|
|
307
|
+
nextRunAt: dueAt,
|
|
308
|
+
createdInSessionId: 'thread-main',
|
|
309
|
+
createdAt: now - 10_000,
|
|
310
|
+
updatedAt: now - 10_000,
|
|
311
|
+
},
|
|
312
|
+
})
|
|
313
|
+
|
|
314
|
+
await scheduler.runSchedulerTickForTests(now)
|
|
315
|
+
const schedule = storage.loadSchedules()['sched-cron']
|
|
316
|
+
const wakes = heartbeatWake.snapshotPendingHeartbeatWakesForTests()
|
|
317
|
+
|
|
318
|
+
console.log(JSON.stringify({
|
|
319
|
+
status: schedule.status,
|
|
320
|
+
nextRunAt: schedule.nextRunAt,
|
|
321
|
+
runNumber: schedule.runNumber,
|
|
322
|
+
historyAction: schedule.history?.[0]?.action || null,
|
|
323
|
+
wakeCount: wakes.length,
|
|
324
|
+
}))
|
|
325
|
+
`)
|
|
326
|
+
|
|
327
|
+
assert.equal(output.status, 'active')
|
|
328
|
+
assert.equal(output.nextRunAt, Date.parse('2030-01-02T08:00:00.000Z'))
|
|
329
|
+
assert.equal(output.runNumber, 1)
|
|
330
|
+
assert.equal(output.historyAction, 'run_started')
|
|
331
|
+
assert.equal(output.wakeCount, 1)
|
|
332
|
+
})
|
|
333
|
+
|
|
205
334
|
it('reuses a persistent mission for scheduled task runs', () => {
|
|
206
335
|
const output = runSchedulerWithTempDataDir(`
|
|
207
336
|
const storageMod = await import('@/lib/server/storage')
|
|
@@ -2,7 +2,6 @@ import { listAgents } from '@/lib/server/agents/agent-repository'
|
|
|
2
2
|
import { loadSchedules, upsertSchedule, upsertSchedules } from '@/lib/server/schedules/schedule-repository'
|
|
3
3
|
import { loadTasks, upsertTask } from '@/lib/server/tasks/task-repository'
|
|
4
4
|
import { enqueueTask } from '@/lib/server/runtime/queue'
|
|
5
|
-
import { CronExpressionParser } from 'cron-parser'
|
|
6
5
|
import { pushMainLoopEventToMainSessions } from '@/lib/server/agents/main-agent-loop'
|
|
7
6
|
import { getScheduleSignatureKey } from '@/lib/schedules/schedule-dedupe'
|
|
8
7
|
import { dispatchWake } from '@/lib/server/runtime/wake-dispatcher'
|
|
@@ -14,6 +13,7 @@ import { hasActiveProtocolRunForSchedule, launchProtocolRunForSchedule } from '@
|
|
|
14
13
|
import { hmrSingleton } from '@/lib/shared-utils'
|
|
15
14
|
import { log } from '@/lib/server/logger'
|
|
16
15
|
import { appendScheduleHistoryEntry } from '@/lib/server/schedules/schedule-history'
|
|
16
|
+
import { assessScheduleNextRunRepair, computeScheduleNextRunAt } from '@/lib/server/schedules/schedule-timing'
|
|
17
17
|
import type { Schedule } from '@/types'
|
|
18
18
|
|
|
19
19
|
const TAG = 'scheduler'
|
|
@@ -52,7 +52,7 @@ export function startScheduler() {
|
|
|
52
52
|
if (schedulerState.intervalId) return
|
|
53
53
|
log.info(TAG, 'Starting scheduler engine (60s tick)')
|
|
54
54
|
|
|
55
|
-
// Compute initial nextRunAt
|
|
55
|
+
// Compute initial timing and repair stale nextRunAt values before the first tick.
|
|
56
56
|
computeNextRuns()
|
|
57
57
|
|
|
58
58
|
schedulerState.intervalId = setInterval(tick, TICK_INTERVAL)
|
|
@@ -66,32 +66,64 @@ export function stopScheduler() {
|
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
function computeNextRuns() {
|
|
69
|
+
function computeNextRuns(now = Date.now()): Record<string, Schedule> {
|
|
70
70
|
const schedules = loadSchedules()
|
|
71
71
|
const changedEntries: Array<[string, Schedule]> = []
|
|
72
72
|
for (const schedule of Object.values(schedules)) {
|
|
73
73
|
if (schedule.status !== 'active') continue
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
74
|
+
const assessment = assessScheduleNextRunRepair(schedule, now)
|
|
75
|
+
if (!assessment.ok) {
|
|
76
|
+
log.error(TAG, `Invalid cron for ${schedule.id}`)
|
|
77
|
+
const failedSchedule = appendScheduleHistoryEntry({
|
|
78
|
+
...schedule,
|
|
79
|
+
status: 'failed',
|
|
80
|
+
updatedAt: now,
|
|
81
|
+
}, {
|
|
82
|
+
now,
|
|
83
|
+
actor: 'system',
|
|
84
|
+
action: 'failed',
|
|
85
|
+
summary: `Schedule failed because cron could not be parsed: "${schedule.name}"`,
|
|
86
|
+
changes: [{
|
|
87
|
+
field: 'status',
|
|
88
|
+
label: 'Status',
|
|
89
|
+
before: 'active',
|
|
90
|
+
after: 'failed',
|
|
91
|
+
}],
|
|
92
|
+
metadata: { reason: 'invalid_cron' },
|
|
93
|
+
})
|
|
94
|
+
schedules[schedule.id] = failedSchedule
|
|
95
|
+
changedEntries.push([schedule.id, failedSchedule])
|
|
96
|
+
continue
|
|
97
|
+
}
|
|
98
|
+
if (assessment.repair) {
|
|
99
|
+
const repairedSchedule = appendScheduleHistoryEntry({
|
|
100
|
+
...schedule,
|
|
101
|
+
nextRunAt: assessment.nextRunAt,
|
|
102
|
+
updatedAt: now,
|
|
103
|
+
}, {
|
|
104
|
+
now,
|
|
105
|
+
actor: 'system',
|
|
106
|
+
action: 'repaired',
|
|
107
|
+
summary: `Schedule timing repaired: "${schedule.name}"`,
|
|
108
|
+
changes: [{
|
|
109
|
+
field: 'nextRunAt',
|
|
110
|
+
label: 'Next run',
|
|
111
|
+
before: assessment.previousNextRunAt == null ? null : String(assessment.previousNextRunAt),
|
|
112
|
+
after: String(assessment.nextRunAt),
|
|
113
|
+
}],
|
|
114
|
+
metadata: { reason: assessment.reason },
|
|
115
|
+
})
|
|
116
|
+
schedules[schedule.id] = repairedSchedule
|
|
117
|
+
changedEntries.push([schedule.id, repairedSchedule])
|
|
87
118
|
}
|
|
88
119
|
}
|
|
89
120
|
if (changedEntries.length > 0) upsertSchedules(changedEntries)
|
|
121
|
+
return schedules
|
|
90
122
|
}
|
|
91
123
|
|
|
92
124
|
async function tick(now = Date.now()) {
|
|
93
125
|
await processDueWatchJobs(now)
|
|
94
|
-
const schedules =
|
|
126
|
+
const schedules = computeNextRuns(now)
|
|
95
127
|
const agents = listAgents()
|
|
96
128
|
const tasks = loadTasks()
|
|
97
129
|
const inFlightScheduleKeys = new Set<string>(
|
|
@@ -101,27 +133,22 @@ async function tick(now = Date.now()) {
|
|
|
101
133
|
.filter((value: string) => value.length > 0),
|
|
102
134
|
)
|
|
103
135
|
|
|
104
|
-
const applyStagger = (ts: number, staggerSec: number | null | undefined): number => {
|
|
105
|
-
if (!staggerSec || staggerSec <= 0) return ts
|
|
106
|
-
return ts + Math.floor(Math.random() * staggerSec * 1000)
|
|
107
|
-
}
|
|
108
|
-
|
|
109
136
|
const advanceSchedule = (schedule: Schedule): void => {
|
|
110
|
-
if (schedule.scheduleType === '
|
|
111
|
-
try {
|
|
112
|
-
const interval = CronExpressionParser.parse(
|
|
113
|
-
schedule.cron,
|
|
114
|
-
schedule.timezone ? { tz: schedule.timezone } : undefined,
|
|
115
|
-
)
|
|
116
|
-
schedule.nextRunAt = applyStagger(interval.next().getTime(), schedule.staggerSec)
|
|
117
|
-
} catch {
|
|
118
|
-
schedule.status = 'failed'
|
|
119
|
-
}
|
|
120
|
-
} else if (schedule.scheduleType === 'interval' && schedule.intervalMs) {
|
|
121
|
-
schedule.nextRunAt = applyStagger(now + schedule.intervalMs, schedule.staggerSec)
|
|
122
|
-
} else if (schedule.scheduleType === 'once') {
|
|
137
|
+
if (schedule.scheduleType === 'once') {
|
|
123
138
|
schedule.status = 'completed'
|
|
124
139
|
schedule.nextRunAt = undefined
|
|
140
|
+
return
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
try {
|
|
144
|
+
const nextRunAt = computeScheduleNextRunAt(schedule, now)
|
|
145
|
+
if (nextRunAt == null) {
|
|
146
|
+
schedule.status = 'failed'
|
|
147
|
+
} else {
|
|
148
|
+
schedule.nextRunAt = nextRunAt
|
|
149
|
+
}
|
|
150
|
+
} catch {
|
|
151
|
+
schedule.status = 'failed'
|
|
125
152
|
}
|
|
126
153
|
}
|
|
127
154
|
|
|
@@ -118,4 +118,18 @@ describe('schedule history', () => {
|
|
|
118
118
|
assert.equal(history[24].id, 'hist-5')
|
|
119
119
|
assert.equal(schedule.revision, 30)
|
|
120
120
|
})
|
|
121
|
+
|
|
122
|
+
it('retains scheduler repair history entries', () => {
|
|
123
|
+
const history = normalizeScheduleHistory([{
|
|
124
|
+
id: 'hist-repair',
|
|
125
|
+
at: 1_000,
|
|
126
|
+
actor: 'system',
|
|
127
|
+
action: 'repaired',
|
|
128
|
+
revision: 1,
|
|
129
|
+
summary: 'Schedule timing repaired',
|
|
130
|
+
}])
|
|
131
|
+
|
|
132
|
+
assert.equal(history.length, 1)
|
|
133
|
+
assert.equal(history[0].action, 'repaired')
|
|
134
|
+
})
|
|
121
135
|
})
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { CronExpressionParser } from 'cron-parser'
|
|
2
|
-
|
|
3
1
|
import { genId } from '@/lib/id'
|
|
4
2
|
import type { BoardTask, Schedule, ScheduleStatus, Session } from '@/types'
|
|
5
3
|
import { pushMainLoopEventToMainSessions } from '@/lib/server/agents/main-agent-loop'
|
|
@@ -19,6 +17,7 @@ import {
|
|
|
19
17
|
import { notify } from '@/lib/server/ws-hub'
|
|
20
18
|
import { getScheduleClusterIds } from '@/lib/server/schedules/schedule-service'
|
|
21
19
|
import { appendScheduleHistoryEntry } from '@/lib/server/schedules/schedule-history'
|
|
20
|
+
import { computeScheduleNextRunAt } from '@/lib/server/schedules/schedule-timing'
|
|
22
21
|
|
|
23
22
|
type RestorableScheduleStatus = Exclude<ScheduleStatus, 'archived'>
|
|
24
23
|
|
|
@@ -50,33 +49,11 @@ export interface SchedulePurgeResult {
|
|
|
50
49
|
}
|
|
51
50
|
|
|
52
51
|
function computeNextRunAt(schedule: Pick<Schedule, 'scheduleType' | 'cron' | 'intervalMs' | 'runAt' | 'timezone' | 'staggerSec'>, now: number): number | undefined {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if (schedule.scheduleType === 'once') {
|
|
59
|
-
return typeof schedule.runAt === 'number' && Number.isFinite(schedule.runAt)
|
|
60
|
-
? applyStagger(schedule.runAt)
|
|
61
|
-
: undefined
|
|
62
|
-
}
|
|
63
|
-
if (schedule.scheduleType === 'interval') {
|
|
64
|
-
return typeof schedule.intervalMs === 'number' && Number.isFinite(schedule.intervalMs)
|
|
65
|
-
? applyStagger(now + schedule.intervalMs)
|
|
66
|
-
: undefined
|
|
67
|
-
}
|
|
68
|
-
if (schedule.scheduleType === 'cron' && typeof schedule.cron === 'string' && schedule.cron.trim()) {
|
|
69
|
-
try {
|
|
70
|
-
const interval = CronExpressionParser.parse(
|
|
71
|
-
schedule.cron,
|
|
72
|
-
schedule.timezone ? { tz: schedule.timezone } : undefined,
|
|
73
|
-
)
|
|
74
|
-
return applyStagger(interval.next().getTime())
|
|
75
|
-
} catch {
|
|
76
|
-
return undefined
|
|
77
|
-
}
|
|
52
|
+
try {
|
|
53
|
+
return computeScheduleNextRunAt(schedule, now)
|
|
54
|
+
} catch {
|
|
55
|
+
return undefined
|
|
78
56
|
}
|
|
79
|
-
return undefined
|
|
80
57
|
}
|
|
81
58
|
|
|
82
59
|
function cloneSchedule(schedule: Schedule): Schedule {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from 'node:fs'
|
|
2
2
|
import path from 'node:path'
|
|
3
|
-
import { CronExpressionParser } from 'cron-parser'
|
|
4
3
|
import { WORKSPACE_DIR } from '@/lib/server/data-dir'
|
|
4
|
+
import { computeScheduleNextRunAt } from '@/lib/server/schedules/schedule-timing'
|
|
5
5
|
|
|
6
6
|
type SchedulePayload = Record<string, unknown>
|
|
7
7
|
|
|
@@ -86,15 +86,6 @@ function parseAtTimeToCron(atTime: string): string | null {
|
|
|
86
86
|
return `${minutes} ${hours} * * *`
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
/**
|
|
90
|
-
* Apply a random stagger offset (in seconds) to a timestamp.
|
|
91
|
-
*/
|
|
92
|
-
function applyStagger(timestamp: number, staggerSec: number | null | undefined): number {
|
|
93
|
-
if (!staggerSec || staggerSec <= 0) return timestamp
|
|
94
|
-
const offset = Math.floor(Math.random() * staggerSec * 1000)
|
|
95
|
-
return timestamp + offset
|
|
96
|
-
}
|
|
97
|
-
|
|
98
89
|
function normalizePositiveInt(value: unknown): number | null {
|
|
99
90
|
const parsed = typeof value === 'number'
|
|
100
91
|
? value
|
|
@@ -326,24 +317,11 @@ export function normalizeSchedulePayload(payload: SchedulePayload, opts: Normali
|
|
|
326
317
|
}
|
|
327
318
|
|
|
328
319
|
if (normalized.status !== 'archived' && normalized.nextRunAt == null) {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
try {
|
|
335
|
-
const cronTimezone = trimString(normalized.timezone)
|
|
336
|
-
const interval = CronExpressionParser.parse(
|
|
337
|
-
normalized.cron as string,
|
|
338
|
-
{
|
|
339
|
-
...(cronTimezone ? { tz: cronTimezone } : {}),
|
|
340
|
-
currentDate: new Date(now),
|
|
341
|
-
},
|
|
342
|
-
)
|
|
343
|
-
normalized.nextRunAt = applyStagger(interval.next().getTime(), normalized.staggerSec as number | null)
|
|
344
|
-
} catch {
|
|
345
|
-
return { ok: false, error: 'Error: invalid cron expression.' }
|
|
346
|
-
}
|
|
320
|
+
try {
|
|
321
|
+
const computedNextRunAt = computeScheduleNextRunAt(normalized, now)
|
|
322
|
+
if (computedNextRunAt != null) normalized.nextRunAt = computedNextRunAt
|
|
323
|
+
} catch {
|
|
324
|
+
return { ok: false, error: 'Error: invalid cron expression.' }
|
|
347
325
|
}
|
|
348
326
|
}
|
|
349
327
|
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import assert from 'node:assert/strict'
|
|
2
|
+
import { describe, it } from 'node:test'
|
|
3
|
+
|
|
4
|
+
import {
|
|
5
|
+
assessScheduleNextRunRepair,
|
|
6
|
+
computeScheduleNextRunAt,
|
|
7
|
+
stableScheduleStaggerMs,
|
|
8
|
+
} from '@/lib/server/schedules/schedule-timing'
|
|
9
|
+
|
|
10
|
+
describe('schedule timing', () => {
|
|
11
|
+
it('computes cron next runs from the provided scheduler time', () => {
|
|
12
|
+
const nextRunAt = computeScheduleNextRunAt({
|
|
13
|
+
id: 'sched-daily',
|
|
14
|
+
name: 'Daily status',
|
|
15
|
+
agentId: 'agent-1',
|
|
16
|
+
scheduleType: 'cron',
|
|
17
|
+
cron: '0 8 * * *',
|
|
18
|
+
timezone: 'UTC',
|
|
19
|
+
}, Date.parse('2030-01-01T08:00:30.000Z'))
|
|
20
|
+
|
|
21
|
+
assert.equal(nextRunAt, Date.parse('2030-01-02T08:00:00.000Z'))
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('uses deterministic schedule stagger inside the configured window', () => {
|
|
25
|
+
const schedule = {
|
|
26
|
+
id: 'sched-staggered',
|
|
27
|
+
name: 'Staggered status',
|
|
28
|
+
agentId: 'agent-1',
|
|
29
|
+
scheduleType: 'cron',
|
|
30
|
+
cron: '0 8 * * *',
|
|
31
|
+
timezone: 'UTC',
|
|
32
|
+
staggerSec: 30,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const first = stableScheduleStaggerMs(schedule)
|
|
36
|
+
const second = stableScheduleStaggerMs(schedule)
|
|
37
|
+
|
|
38
|
+
assert.equal(first, second)
|
|
39
|
+
assert.ok(first >= 0)
|
|
40
|
+
assert.ok(first < 30_000)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('repairs stale future cron slots to the earliest upcoming slot', () => {
|
|
44
|
+
const assessment = assessScheduleNextRunRepair({
|
|
45
|
+
id: 'sched-stale',
|
|
46
|
+
name: 'Daily status',
|
|
47
|
+
agentId: 'agent-1',
|
|
48
|
+
scheduleType: 'cron',
|
|
49
|
+
cron: '0 8 * * *',
|
|
50
|
+
timezone: 'UTC',
|
|
51
|
+
status: 'active',
|
|
52
|
+
nextRunAt: Date.parse('2026-05-12T08:00:00.000Z'),
|
|
53
|
+
}, Date.parse('2026-05-06T07:30:00.000Z'))
|
|
54
|
+
|
|
55
|
+
assert.equal(assessment.ok, true)
|
|
56
|
+
assert.equal(assessment.repair, true)
|
|
57
|
+
if (assessment.ok && assessment.repair) {
|
|
58
|
+
assert.equal(assessment.reason, 'stale_future')
|
|
59
|
+
assert.equal(assessment.nextRunAt, Date.parse('2026-05-06T08:00:00.000Z'))
|
|
60
|
+
}
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
it('flags invalid due cron schedules before they launch', () => {
|
|
64
|
+
const assessment = assessScheduleNextRunRepair({
|
|
65
|
+
id: 'sched-invalid',
|
|
66
|
+
name: 'Broken cron',
|
|
67
|
+
agentId: 'agent-1',
|
|
68
|
+
scheduleType: 'cron',
|
|
69
|
+
cron: 'not a cron',
|
|
70
|
+
status: 'active',
|
|
71
|
+
nextRunAt: Date.parse('2026-05-06T07:00:00.000Z'),
|
|
72
|
+
}, Date.parse('2026-05-06T07:30:00.000Z'))
|
|
73
|
+
|
|
74
|
+
assert.equal(assessment.ok, false)
|
|
75
|
+
if (!assessment.ok) {
|
|
76
|
+
assert.equal(assessment.reason, 'invalid_cron')
|
|
77
|
+
assert.equal(assessment.previousNextRunAt, Date.parse('2026-05-06T07:00:00.000Z'))
|
|
78
|
+
}
|
|
79
|
+
})
|
|
80
|
+
})
|