openclaw-scheduler 0.2.8 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -0
- package/INSTALL-ADDITIONAL-HOST.md +1 -1
- package/INSTALL-LINUX.md +1 -1
- package/INSTALL-WINDOWS.md +1 -1
- package/INSTALL.md +1 -1
- package/JOB-QUICK-REF.md +2 -0
- package/README.md +5 -5
- package/cli.js +9 -1
- package/dispatch/529-recovery.mjs +21 -2
- package/dispatch/completion.mjs +49 -0
- package/dispatch/index.mjs +179 -11
- package/dispatch/paths.mjs +36 -0
- package/dispatch/watcher.mjs +78 -9
- package/dispatcher-strategies.js +121 -72
- package/dispatcher.js +4 -2
- package/docs/gateway-contract.md +21 -0
- package/gateway.js +140 -30
- package/index.d.ts +5 -0
- package/jobs.js +23 -8
- package/migrate-consolidate.js +6 -2
- package/package.json +4 -3
- package/paths.js +43 -1
- package/scheduler-schema.js +2 -0
- package/schema.sql +6 -1
- package/setup.mjs +24 -22
package/dispatch/watcher.mjs
CHANGED
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
import { getDispatchLivenessPolicy } from './liveness.mjs';
|
|
40
40
|
import { resolveLabelsPath } from './paths.mjs';
|
|
41
41
|
import { sendMessage } from '../messages.js';
|
|
42
|
+
import { ensureArtifactsDir, resolveArtifactsDir } from '../paths.js';
|
|
42
43
|
|
|
43
44
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
44
45
|
const INDEX_PATH = process.env.DISPATCH_INDEX_PATH || join(__dirname, 'index.mjs');
|
|
@@ -54,12 +55,68 @@ const MAX_GW_RESTART_RETRIES = 2; // Max retries for gateway-restart-kill recove
|
|
|
54
55
|
|
|
55
56
|
const FLAT_WINDOW_MS = 3 * 60 * 1000; // 3 min flat = genuinely stuck
|
|
56
57
|
const ACTIVITY_POLL_MS = 30_000;
|
|
58
|
+
const COMPLETION_INLINE_LIMIT_BYTES = parsePositiveEnvInt('DISPATCH_COMPLETION_INLINE_LIMIT_BYTES', 60 * 1024);
|
|
57
59
|
|
|
58
60
|
/** How often the watcher writes lastPing to labels.json (heartbeat signal).
|
|
59
61
|
* The watchdog guard in index.mjs treats pings older than 3x this as stale,
|
|
60
62
|
* so PING_INTERVAL_MS must stay well below PING_STALE_MS (3 * 60_000). */
|
|
61
63
|
const PING_INTERVAL_MS = 60_000; // 60 seconds
|
|
62
64
|
|
|
65
|
+
function parsePositiveEnvInt(name, fallback) {
|
|
66
|
+
const value = Number.parseInt(String(process.env[name] ?? ''), 10);
|
|
67
|
+
return Number.isFinite(value) && value > 0 ? value : fallback;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function byteLength(text) {
|
|
71
|
+
return Buffer.byteLength(String(text ?? ''), 'utf8');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function sliceUtf8Bytes(text, maxBytes) {
|
|
75
|
+
const source = String(text ?? '');
|
|
76
|
+
if (byteLength(source) <= maxBytes) return source;
|
|
77
|
+
|
|
78
|
+
let usedBytes = 0;
|
|
79
|
+
let endIndex = 0;
|
|
80
|
+
for (const char of source) {
|
|
81
|
+
const charBytes = byteLength(char);
|
|
82
|
+
if (usedBytes + charBytes > maxBytes) break;
|
|
83
|
+
usedBytes += charBytes;
|
|
84
|
+
endIndex += char.length;
|
|
85
|
+
}
|
|
86
|
+
return source.slice(0, endIndex).trimEnd();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function completionArtifactPath(label) {
|
|
90
|
+
const safeLabel = String(label || 'completion')
|
|
91
|
+
.replace(/[^a-z0-9._-]+/gi, '-')
|
|
92
|
+
.replace(/^-+|-+$/g, '')
|
|
93
|
+
.slice(0, 80) || 'completion';
|
|
94
|
+
const dir = ensureArtifactsDir(join(resolveArtifactsDir({ env: process.env }), 'dispatch-completions'));
|
|
95
|
+
return join(dir, `${new Date().toISOString().replace(/[:.]/g, '-')}-${safeLabel}.txt`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function formatCompletionStdout(label, deliveryText) {
|
|
99
|
+
const header = `🌶️ *dispatch* [${label}] completed:\n\n`;
|
|
100
|
+
const body = String(deliveryText ?? '');
|
|
101
|
+
const bodyBytes = byteLength(body);
|
|
102
|
+
|
|
103
|
+
if (bodyBytes <= COMPLETION_INLINE_LIMIT_BYTES) {
|
|
104
|
+
return `${header}${body}\n`;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let artifactNote;
|
|
108
|
+
try {
|
|
109
|
+
const artifactPath = completionArtifactPath(label);
|
|
110
|
+
writeFileSync(artifactPath, body, 'utf8');
|
|
111
|
+
artifactNote = `\n\nFull completion report saved to ${artifactPath} (${bodyBytes} bytes). Inline delivery capped at ${COMPLETION_INLINE_LIMIT_BYTES} bytes to avoid dumping an oversized report.`;
|
|
112
|
+
} catch (err) {
|
|
113
|
+
artifactNote = `\n\nFull completion report was ${bodyBytes} bytes, but saving the oversized report failed: ${err.message}. Inline delivery capped at ${COMPLETION_INLINE_LIMIT_BYTES} bytes.`;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const bodyBudget = Math.max(0, COMPLETION_INLINE_LIMIT_BYTES - byteLength(artifactNote));
|
|
117
|
+
const inlineBody = sliceUtf8Bytes(body, bodyBudget);
|
|
118
|
+
return `${header}${inlineBody}${artifactNote}\n`;
|
|
119
|
+
}
|
|
63
120
|
|
|
64
121
|
function getGatewayToken() {
|
|
65
122
|
if (process.env.OPENCLAW_GATEWAY_TOKEN) return process.env.OPENCLAW_GATEWAY_TOKEN;
|
|
@@ -922,11 +979,7 @@ function deliverResult(label, lastReply, fallbackSummary, completionPayload = nu
|
|
|
922
979
|
markLabelDone(label, completion.summary);
|
|
923
980
|
|
|
924
981
|
if (completion.deliveryText) {
|
|
925
|
-
|
|
926
|
-
const reply = completion.deliveryText.length > maxLen
|
|
927
|
-
? completion.deliveryText.slice(0, maxLen) + '\n\n..[truncated]'
|
|
928
|
-
: completion.deliveryText;
|
|
929
|
-
process.stdout.write(`🌶️ *dispatch* [${label}] completed:\n\n${reply}\n`);
|
|
982
|
+
process.stdout.write(formatCompletionStdout(label, completion.deliveryText));
|
|
930
983
|
process.exit(0);
|
|
931
984
|
}
|
|
932
985
|
|
|
@@ -1120,14 +1173,18 @@ function runOnceAndExit() {
|
|
|
1120
1173
|
}
|
|
1121
1174
|
|
|
1122
1175
|
const ageMs = status.liveness?.ageMs;
|
|
1123
|
-
const
|
|
1176
|
+
const livenessPolicy = getCurrentLivenessPolicy();
|
|
1177
|
+
const idleResultCheckMs = livenessPolicy.idleProbeMs;
|
|
1178
|
+
const idleFailureMs = livenessPolicy.idleFailureMs;
|
|
1124
1179
|
if (ageMs != null && ageMs >= idleResultCheckMs) {
|
|
1125
1180
|
const result = dispatch('result', ['--label', label]);
|
|
1126
1181
|
if (hasStructuredCompletion(result)) {
|
|
1127
1182
|
deliverResult(label, result?.lastReply || null, null, result?.completion || null);
|
|
1128
1183
|
}
|
|
1129
1184
|
|
|
1130
|
-
const stallReason =
|
|
1185
|
+
const stallReason = ageMs >= idleFailureMs
|
|
1186
|
+
? getRunningSessionStallReason(status, idleFailureMs)
|
|
1187
|
+
: null;
|
|
1131
1188
|
if (stallReason) {
|
|
1132
1189
|
process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
|
|
1133
1190
|
markLabelError(label, stallReason);
|
|
@@ -1504,14 +1561,18 @@ while (Date.now() < deadline) {
|
|
|
1504
1561
|
// while this watcher's lastPing heartbeat is fresh (written every 60s);
|
|
1505
1562
|
// this path handles normal completion before the ping goes stale.
|
|
1506
1563
|
const ageMs = status.liveness?.ageMs;
|
|
1507
|
-
const
|
|
1564
|
+
const livenessPolicy = getCurrentLivenessPolicy();
|
|
1565
|
+
const idleResultCheckMs = livenessPolicy.idleProbeMs;
|
|
1566
|
+
const idleFailureMs = livenessPolicy.idleFailureMs;
|
|
1508
1567
|
if (ageMs != null && ageMs >= idleResultCheckMs) {
|
|
1509
1568
|
const result = dispatch('result', ['--label', label]);
|
|
1510
1569
|
if (hasStructuredCompletion(result)) {
|
|
1511
1570
|
deliverResult(label, result?.lastReply || null, null, result?.completion || null);
|
|
1512
1571
|
}
|
|
1513
1572
|
|
|
1514
|
-
const stallReason =
|
|
1573
|
+
const stallReason = ageMs >= idleFailureMs
|
|
1574
|
+
? getRunningSessionStallReason(status, idleFailureMs)
|
|
1575
|
+
: null;
|
|
1515
1576
|
if (stallReason) {
|
|
1516
1577
|
process.stderr.write(`[watcher] [${label}] ${stallReason}\n`);
|
|
1517
1578
|
markLabelError(label, stallReason);
|
|
@@ -1530,6 +1591,14 @@ while (Date.now() < deadline) {
|
|
|
1530
1591
|
// Timed out -- try one last result check
|
|
1531
1592
|
const finalResult = dispatch('result', ['--label', label]);
|
|
1532
1593
|
const finalStatus = dispatch('status', ['--label', label]);
|
|
1594
|
+
if (hasStructuredCompletion(finalResult)) {
|
|
1595
|
+
deliverResult(
|
|
1596
|
+
label,
|
|
1597
|
+
finalResult?.lastReply || null,
|
|
1598
|
+
finalStatus?.summary || null,
|
|
1599
|
+
finalResult?.completion || finalStatus?.completion || null,
|
|
1600
|
+
);
|
|
1601
|
+
}
|
|
1533
1602
|
if (finalStatus?.status === 'done') {
|
|
1534
1603
|
const rc = getRetryCount(label);
|
|
1535
1604
|
if (rc > 0) setRetryCount(label, 0);
|
package/dispatcher-strategies.js
CHANGED
|
@@ -1214,6 +1214,93 @@ export async function executeShell(job, ctx, deps) {
|
|
|
1214
1214
|
|
|
1215
1215
|
// -- Strategy: Agent (isolated session) ----------------------
|
|
1216
1216
|
|
|
1217
|
+
function describeAgentSelection(selection) {
|
|
1218
|
+
return {
|
|
1219
|
+
model: selection?.model || null,
|
|
1220
|
+
auth_profile: selection?.authProfile || null,
|
|
1221
|
+
};
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
function sameAgentSelection(left, right) {
|
|
1225
|
+
return (left?.model || undefined) === (right?.model || undefined)
|
|
1226
|
+
&& (left?.authProfile || undefined) === (right?.authProfile || undefined);
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
async function resolveConfiguredAuthProfile(authProfile, deps, jobId, fieldName = 'auth_profile') {
|
|
1230
|
+
const { listSessions, log } = deps;
|
|
1231
|
+
let resolvedAuthProfile = authProfile || undefined;
|
|
1232
|
+
if (resolvedAuthProfile !== 'inherit') return resolvedAuthProfile;
|
|
1233
|
+
|
|
1234
|
+
try {
|
|
1235
|
+
const sessions = await listSessions({ kinds: ['main'], activeMinutes: 120, limit: 10 });
|
|
1236
|
+
const sessionList = sessions?.result?.details?.sessions || sessions?.result?.sessions || sessions?.sessions || sessions || [];
|
|
1237
|
+
const mainSession = Array.isArray(sessionList)
|
|
1238
|
+
? sessionList.find(s => {
|
|
1239
|
+
const key = s.key || s.sessionKey || '';
|
|
1240
|
+
return key.includes(':main:') || key.endsWith(':main') || key === 'main';
|
|
1241
|
+
})
|
|
1242
|
+
: null;
|
|
1243
|
+
const profileId = mainSession?.authProfileOverride || mainSession?.authProfile || mainSession?.profile;
|
|
1244
|
+
if (profileId) {
|
|
1245
|
+
resolvedAuthProfile = profileId;
|
|
1246
|
+
log('debug', `Resolved ${fieldName} 'inherit' -> '${profileId}'`, { jobId });
|
|
1247
|
+
} else {
|
|
1248
|
+
log('debug', `${fieldName} 'inherit' -- no main session profile found, passing 'inherit' as-is`, { jobId });
|
|
1249
|
+
}
|
|
1250
|
+
} catch (err) {
|
|
1251
|
+
log('warn', `Failed to resolve ${fieldName} 'inherit': ${err.message}`, { jobId });
|
|
1252
|
+
// Fall through with 'inherit' -- gateway may handle it.
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
return resolvedAuthProfile;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
async function runAgentTurnForSelection(job, deps, prompt, sessionKey, selection, dispatchAgentTurn) {
|
|
1259
|
+
const { log } = deps;
|
|
1260
|
+
const { syncAuthStoreToSession: syncAuth, applySessionOverridesToSessionStore: applySessionOverrides } = deps;
|
|
1261
|
+
|
|
1262
|
+
// Always sync the live auth store before each attempt so refreshed credentials
|
|
1263
|
+
// are visible to any embedded/isolated runner startup.
|
|
1264
|
+
if (typeof syncAuth === 'function') {
|
|
1265
|
+
const syncResult = syncAuth(job.agent_id || 'main');
|
|
1266
|
+
if (syncResult.ok) {
|
|
1267
|
+
log('debug', `Synced live auth store to agent '${job.agent_id || 'main'}'`, { jobId: job.id });
|
|
1268
|
+
} else {
|
|
1269
|
+
log('warn', `Failed to sync auth store: ${syncResult.error}`, { jobId: job.id });
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
if (typeof applySessionOverrides === 'function') {
|
|
1274
|
+
const applyResult = applySessionOverrides(
|
|
1275
|
+
sessionKey,
|
|
1276
|
+
{
|
|
1277
|
+
authProfile: selection.authProfile,
|
|
1278
|
+
modelRef: selection.model || null,
|
|
1279
|
+
},
|
|
1280
|
+
job.agent_id || 'main',
|
|
1281
|
+
);
|
|
1282
|
+
if (applyResult.ok) {
|
|
1283
|
+
log('debug', `Applied session overrides for ${sessionKey}`, {
|
|
1284
|
+
jobId: job.id,
|
|
1285
|
+
authProfile: selection.authProfile || null,
|
|
1286
|
+
modelRef: selection.model || null,
|
|
1287
|
+
});
|
|
1288
|
+
} else {
|
|
1289
|
+
log('warn', `Failed to apply session overrides: ${applyResult.error}`, { jobId: job.id, sessionKey });
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
return dispatchAgentTurn({
|
|
1294
|
+
message: prompt,
|
|
1295
|
+
agentId: job.agent_id || 'main',
|
|
1296
|
+
sessionKey,
|
|
1297
|
+
authProfile: selection.authProfile,
|
|
1298
|
+
idleTimeoutMs: (job.payload_timeout_seconds || 120) * 1000,
|
|
1299
|
+
pollIntervalMs: 60000,
|
|
1300
|
+
absoluteTimeoutMs: job.run_timeout_ms || 300000,
|
|
1301
|
+
});
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1217
1304
|
export async function executeAgent(job, ctx, deps) {
|
|
1218
1305
|
const {
|
|
1219
1306
|
waitForGateway, updateRunSession, setAgentStatus,
|
|
@@ -1224,7 +1311,6 @@ export async function executeAgent(job, ctx, deps) {
|
|
|
1224
1311
|
runIsolatedAgentTurn,
|
|
1225
1312
|
updateContextSummary, releaseDispatch, releaseIdempotencyKey,
|
|
1226
1313
|
updateJob, matchesSentinel, detectTransientError,
|
|
1227
|
-
listSessions,
|
|
1228
1314
|
sqliteNow, log,
|
|
1229
1315
|
} = deps;
|
|
1230
1316
|
const dispatchAgentTurn = runIsolatedAgentTurn || runAgentTurnWithActivityTimeout;
|
|
@@ -1264,82 +1350,45 @@ export async function executeAgent(job, ctx, deps) {
|
|
|
1264
1350
|
const { prompt, contextMeta } = buildJobPrompt(job, ctx.run);
|
|
1265
1351
|
try { updateContextSummary(ctx.run.id, contextMeta); } catch (_e) { /* column may not exist yet */ }
|
|
1266
1352
|
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
}
|
|
1291
|
-
}
|
|
1353
|
+
const primarySelection = {
|
|
1354
|
+
model: job.payload_model || undefined,
|
|
1355
|
+
authProfile: await resolveConfiguredAuthProfile(
|
|
1356
|
+
ctx.v02Outcomes?.effective_auth_profile || job.auth_profile || undefined,
|
|
1357
|
+
deps,
|
|
1358
|
+
job.id,
|
|
1359
|
+
ctx.v02Outcomes?.effective_auth_profile ? 'effective_auth_profile' : 'auth_profile'
|
|
1360
|
+
),
|
|
1361
|
+
};
|
|
1362
|
+
const hasConfiguredFallback = job.payload_model_fallback != null || job.auth_profile_fallback != null;
|
|
1363
|
+
const fallbackSelection = hasConfiguredFallback ? {
|
|
1364
|
+
model: job.payload_model_fallback || primarySelection.model || undefined,
|
|
1365
|
+
authProfile: job.auth_profile_fallback != null
|
|
1366
|
+
? await resolveConfiguredAuthProfile(job.auth_profile_fallback, deps, job.id, 'auth_profile_fallback')
|
|
1367
|
+
: primarySelection.authProfile,
|
|
1368
|
+
} : null;
|
|
1369
|
+
|
|
1370
|
+
let turnResult;
|
|
1371
|
+
try {
|
|
1372
|
+
turnResult = await runAgentTurnForSelection(job, deps, prompt, sessionKey, primarySelection, dispatchAgentTurn);
|
|
1373
|
+
} catch (primaryError) {
|
|
1374
|
+
const canTryConfiguredFallback = fallbackSelection && !sameAgentSelection(primarySelection, fallbackSelection);
|
|
1375
|
+
if (!canTryConfiguredFallback) throw primaryError;
|
|
1292
1376
|
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
if (typeof syncAuth === 'function') {
|
|
1300
|
-
const syncResult = syncAuth(job.agent_id || 'main');
|
|
1301
|
-
if (syncResult.ok) {
|
|
1302
|
-
log('debug', `Synced live auth store to agent '${job.agent_id || 'main'}'`, { jobId: job.id });
|
|
1303
|
-
} else {
|
|
1304
|
-
log('warn', `Failed to sync auth store: ${syncResult.error}`, { jobId: job.id });
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1377
|
+
log('warn', 'Primary agent selection failed; retrying with configured fallback', {
|
|
1378
|
+
jobId: job.id,
|
|
1379
|
+
primary: describeAgentSelection(primarySelection),
|
|
1380
|
+
fallback: describeAgentSelection(fallbackSelection),
|
|
1381
|
+
error: primaryError.message,
|
|
1382
|
+
});
|
|
1307
1383
|
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
const { applyAuthProfileToSessionStore: applyAuthProfile } = deps;
|
|
1314
|
-
if (typeof applyAuthProfile === 'function') {
|
|
1315
|
-
const applyResult = applyAuthProfile(sessionKey, resolvedAuthProfile, job.agent_id || 'main');
|
|
1316
|
-
if (applyResult.ok) {
|
|
1317
|
-
log('debug', `Applied auth profile '${resolvedAuthProfile}' to session store for ${sessionKey}`, { jobId: job.id });
|
|
1318
|
-
} else {
|
|
1319
|
-
log('warn', `Failed to apply auth profile to session store: ${applyResult.error}`, { jobId: job.id, sessionKey });
|
|
1320
|
-
}
|
|
1384
|
+
try {
|
|
1385
|
+
turnResult = await runAgentTurnForSelection(job, deps, prompt, sessionKey, fallbackSelection, dispatchAgentTurn);
|
|
1386
|
+
log('info', 'Configured agent fallback succeeded', { jobId: job.id, fallback: describeAgentSelection(fallbackSelection) });
|
|
1387
|
+
} catch (fallbackError) {
|
|
1388
|
+
throw new Error(`Primary agent selection failed: ${primaryError.message}; configured fallback also failed: ${fallbackError.message}`, { cause: fallbackError });
|
|
1321
1389
|
}
|
|
1322
1390
|
}
|
|
1323
1391
|
|
|
1324
|
-
// Isolated dispatch primitive: HTTP-only chat completions call. The
|
|
1325
|
-
// scheduler must never fork a sibling `openclaw` process to spawn an
|
|
1326
|
-
// isolated session -- that variant has historically SIGTERM'd the
|
|
1327
|
-
// launchd-tracked gateway parent and orphaned a node process on port
|
|
1328
|
-
// 18789 (see ISOLATED_DISPATCH_PRIMITIVE in gateway.js).
|
|
1329
|
-
const turnResult = await dispatchAgentTurn({
|
|
1330
|
-
message: prompt,
|
|
1331
|
-
agentId: job.agent_id || 'main',
|
|
1332
|
-
sessionKey,
|
|
1333
|
-
model: job.payload_model || undefined,
|
|
1334
|
-
authProfile: resolvedAuthProfile,
|
|
1335
|
-
// materializedEnv deferred: the x-openclaw-env-inject header is not sent
|
|
1336
|
-
// until the OpenClaw gateway implements the receiver side. See
|
|
1337
|
-
// openclaw/docs/env-inject-proposal.md for the gateway spec.
|
|
1338
|
-
idleTimeoutMs: (job.payload_timeout_seconds || 120) * 1000,
|
|
1339
|
-
pollIntervalMs: 60000,
|
|
1340
|
-
absoluteTimeoutMs: job.run_timeout_ms || 300000,
|
|
1341
|
-
});
|
|
1342
|
-
|
|
1343
1392
|
const content = turnResult.content || '';
|
|
1344
1393
|
const trimmed = content.trim();
|
|
1345
1394
|
|
package/dispatcher.js
CHANGED
|
@@ -54,7 +54,7 @@ import {
|
|
|
54
54
|
runAgentTurnWithActivityTimeout, runIsolatedAgentTurn,
|
|
55
55
|
sendSystemEvent, getAllSubAgentSessions, listSessions,
|
|
56
56
|
deliverMessage, checkGatewayHealth, waitForGateway, resolveDeliveryAlias,
|
|
57
|
-
|
|
57
|
+
applySessionOverridesToSessionStore,
|
|
58
58
|
syncAuthStoreToSession,
|
|
59
59
|
} from './gateway.js';
|
|
60
60
|
import { normalizeShellResult } from './shell-result.js';
|
|
@@ -314,7 +314,7 @@ function buildDispatchDeps() {
|
|
|
314
314
|
updateContextSummary, releaseIdempotencyKey,
|
|
315
315
|
matchesSentinel, detectTransientError,
|
|
316
316
|
listSessions,
|
|
317
|
-
|
|
317
|
+
applySessionOverridesToSessionStore,
|
|
318
318
|
syncAuthStoreToSession,
|
|
319
319
|
// Finalize
|
|
320
320
|
updateIdempotencyResultHash,
|
|
@@ -430,8 +430,10 @@ function buildJobPrompt(job, run) {
|
|
|
430
430
|
execution_intent: job.execution_intent || 'execute',
|
|
431
431
|
execution_read_only: Boolean(job.execution_read_only),
|
|
432
432
|
payload_model: job.payload_model || null,
|
|
433
|
+
payload_model_fallback: job.payload_model_fallback || null,
|
|
433
434
|
payload_thinking: job.payload_thinking || null,
|
|
434
435
|
auth_profile: job.auth_profile || null,
|
|
436
|
+
auth_profile_fallback: job.auth_profile_fallback || null,
|
|
435
437
|
};
|
|
436
438
|
|
|
437
439
|
const triggerContext = buildTriggeredRunContext(run);
|
package/docs/gateway-contract.md
CHANGED
|
@@ -90,6 +90,10 @@ single user message to an agent and receives the complete assistant response.
|
|
|
90
90
|
The `model` field defaults to `openclaw:<agentId>` but can be overridden via
|
|
91
91
|
`job.payload_model`.
|
|
92
92
|
|
|
93
|
+
If `job.payload_model_fallback` and/or `job.auth_profile_fallback` are set, the
|
|
94
|
+
scheduler retries once in the same run with the configured fallback selection
|
|
95
|
+
after a primary selection error.
|
|
96
|
+
|
|
93
97
|
**Response body** (expected):
|
|
94
98
|
|
|
95
99
|
```json
|
|
@@ -653,6 +657,23 @@ directly as the `x-openclaw-auth-profile` header value without resolution.
|
|
|
653
657
|
|
|
654
658
|
---
|
|
655
659
|
|
|
660
|
+
## Fallback Model / Auth Selection
|
|
661
|
+
|
|
662
|
+
Jobs can optionally persist `payload_model_fallback` and `auth_profile_fallback`
|
|
663
|
+
alongside the primary `payload_model` / `auth_profile` fields.
|
|
664
|
+
|
|
665
|
+
Runtime behavior:
|
|
666
|
+
|
|
667
|
+
- The scheduler attempts the primary selection first.
|
|
668
|
+
- If the primary chat-completions request errors before a usable assistant
|
|
669
|
+
reply is returned, `executeAgent()` retries once in the same run using the
|
|
670
|
+
configured fallback overrides.
|
|
671
|
+
- Any fallback dimension left unset keeps the primary effective value.
|
|
672
|
+
- Existing jobs remain backward-compatible because both fallback fields default
|
|
673
|
+
to `NULL` and no retry is attempted unless a fallback override is configured.
|
|
674
|
+
|
|
675
|
+
---
|
|
676
|
+
|
|
656
677
|
## Env-Inject Forwarding
|
|
657
678
|
|
|
658
679
|
When credential materialization for an agent task produces a non-empty plain
|
package/gateway.js
CHANGED
|
@@ -144,6 +144,7 @@ export async function runAgentTurn(opts) {
|
|
|
144
144
|
* @param {number} opts.pollIntervalMs - How often to poll session activity (default: 60000)
|
|
145
145
|
* @param {number} opts.absoluteTimeoutMs - Hard ceiling regardless of activity (default: 300000)
|
|
146
146
|
* @param {string} opts.authProfile - Auth profile override (null, 'inherit', or 'provider:label')
|
|
147
|
+
* @param {string[]} [opts.sessionKinds] - Optional session kinds to track for activity polling
|
|
147
148
|
*/
|
|
148
149
|
export async function runAgentTurnWithActivityTimeout(opts) {
|
|
149
150
|
const {
|
|
@@ -155,10 +156,46 @@ export async function runAgentTurnWithActivityTimeout(opts) {
|
|
|
155
156
|
idleTimeoutMs = 120000, // per-check idle threshold (from payload_timeout_seconds)
|
|
156
157
|
pollIntervalMs = 60000, // check activity every 60s
|
|
157
158
|
absoluteTimeoutMs = 300000, // hard ceiling (run_timeout_ms)
|
|
159
|
+
sessionKinds,
|
|
158
160
|
} = opts;
|
|
159
161
|
|
|
160
162
|
const controller = new AbortController();
|
|
161
163
|
let abortReason = null;
|
|
164
|
+
const normalizedAgentId = (agentId || 'main').toLowerCase();
|
|
165
|
+
const normalizedSessionKey = String(sessionKey || '').toLowerCase();
|
|
166
|
+
|
|
167
|
+
const inferSessionKinds = () => {
|
|
168
|
+
if (Array.isArray(sessionKinds) && sessionKinds.length > 0) {
|
|
169
|
+
return [...new Set(sessionKinds.map(k => String(k).toLowerCase()).filter(Boolean))];
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Explicitly isolated/subagent sessions should not be pinned to main session
|
|
173
|
+
// so they can report idleness based on their own active session records.
|
|
174
|
+
if (
|
|
175
|
+
normalizedSessionKey === 'isolated' ||
|
|
176
|
+
normalizedSessionKey.startsWith('isolated:') ||
|
|
177
|
+
normalizedSessionKey.endsWith(':isolated') ||
|
|
178
|
+
normalizedSessionKey.includes(':isolated:') ||
|
|
179
|
+
normalizedAgentId === 'subagent'
|
|
180
|
+
) {
|
|
181
|
+
return ['subagent', 'isolated'];
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Default to including main unless we can clearly infer this is an isolated run.
|
|
185
|
+
if (
|
|
186
|
+
normalizedAgentId === 'main' ||
|
|
187
|
+
normalizedSessionKey === 'main' ||
|
|
188
|
+
normalizedSessionKey.startsWith('main:') ||
|
|
189
|
+
normalizedSessionKey.includes(':main:') ||
|
|
190
|
+
normalizedSessionKey.endsWith(':main')
|
|
191
|
+
) {
|
|
192
|
+
return ['main', 'subagent', 'isolated'];
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return ['main', 'subagent', 'isolated'];
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const resolvedSessionKinds = inferSessionKinds();
|
|
162
199
|
|
|
163
200
|
// Hard absolute ceiling -- always fires regardless of activity
|
|
164
201
|
const absoluteTimer = setTimeout(() => {
|
|
@@ -171,7 +208,7 @@ export async function runAgentTurnWithActivityTimeout(opts) {
|
|
|
171
208
|
|
|
172
209
|
const checkActivity = async () => {
|
|
173
210
|
try {
|
|
174
|
-
const result = await listSessions({ kinds:
|
|
211
|
+
const result = await listSessions({ kinds: resolvedSessionKinds, activeMinutes: 60 });
|
|
175
212
|
// Normalise: gateway wraps result in several layers
|
|
176
213
|
const sessions =
|
|
177
214
|
result?.result?.details?.sessions ||
|
|
@@ -551,20 +588,55 @@ export async function waitForGateway(timeoutMs = 30000, intervalMs = 2000) {
|
|
|
551
588
|
* @param {string} [agentId='main'] - Agent ID for store path resolution
|
|
552
589
|
* @returns {{ ok: boolean, error?: string }}
|
|
553
590
|
*/
|
|
554
|
-
|
|
555
|
-
if (!sessionKey || !authProfile) {
|
|
556
|
-
return { ok: false, error: 'sessionKey and authProfile are required' };
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
// The gateway may persist session state under either the canonical agent-scoped
|
|
560
|
-
// key or the flat transport key, depending on which path created the session.
|
|
561
|
-
// Keep both aliases in sync so isolated scheduler jobs cannot miss the override.
|
|
591
|
+
function resolveSessionKeyAliases(sessionKey, agentId = 'main') {
|
|
562
592
|
const canonicalMatch = sessionKey.match(/^agent:[^:]+:(.+)$/);
|
|
563
593
|
const canonicalKey = sessionKey.startsWith('agent:')
|
|
564
594
|
? sessionKey
|
|
565
595
|
: `agent:${agentId}:${sessionKey}`;
|
|
566
596
|
const flatSessionKey = canonicalMatch?.[1] || sessionKey;
|
|
567
|
-
|
|
597
|
+
return Array.from(new Set([canonicalKey, flatSessionKey]));
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function parseSessionModelRef(modelRef) {
|
|
601
|
+
const trimmed = typeof modelRef === 'string' ? modelRef.trim() : '';
|
|
602
|
+
if (!trimmed) {
|
|
603
|
+
return { providerOverride: undefined, modelOverride: undefined };
|
|
604
|
+
}
|
|
605
|
+
const slashIndex = trimmed.indexOf('/');
|
|
606
|
+
if (slashIndex <= 0 || slashIndex >= trimmed.length - 1) {
|
|
607
|
+
return { providerOverride: undefined, modelOverride: trimmed };
|
|
608
|
+
}
|
|
609
|
+
const providerOverride = trimmed.slice(0, slashIndex).trim();
|
|
610
|
+
const modelOverride = trimmed.slice(slashIndex + 1).trim();
|
|
611
|
+
return {
|
|
612
|
+
providerOverride: providerOverride || undefined,
|
|
613
|
+
modelOverride: modelOverride || undefined,
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Write scheduler-managed session overrides directly to the gateway's sessions.json store.
|
|
619
|
+
*
|
|
620
|
+
* The gateway reads sessions.json on each agent turn (with mtime-based cache
|
|
621
|
+
* invalidation), so writing here before dispatch ensures the embedded runner
|
|
622
|
+
* picks up the correct auth profile and model selection.
|
|
623
|
+
*
|
|
624
|
+
* @param {string} sessionKey - Session key as used in the HTTP request (e.g. 'scheduler:<jobId>')
|
|
625
|
+
* @param {{ authProfile?: string | null, modelRef?: string | null }} overrides - Desired session overrides
|
|
626
|
+
* @param {string} [agentId='main'] - Agent ID for store path resolution
|
|
627
|
+
* @returns {{ ok: boolean, error?: string }}
|
|
628
|
+
*/
|
|
629
|
+
export function applySessionOverridesToSessionStore(sessionKey, overrides = {}, agentId = 'main') {
|
|
630
|
+
if (!sessionKey) {
|
|
631
|
+
return { ok: false, error: 'sessionKey is required' };
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
const authProfile = typeof overrides.authProfile === 'string' ? overrides.authProfile.trim() : '';
|
|
635
|
+
const shouldSetAuthProfile = Boolean(authProfile) && authProfile !== 'inherit';
|
|
636
|
+
const { providerOverride, modelOverride } = parseSessionModelRef(overrides.modelRef);
|
|
637
|
+
const shouldSetModelOverride = Boolean(modelOverride);
|
|
638
|
+
|
|
639
|
+
const keyAliases = resolveSessionKeyAliases(sessionKey, agentId);
|
|
568
640
|
const sessionsPath = join(HOME_DIR, '.openclaw', 'agents', agentId, 'sessions', 'sessions.json');
|
|
569
641
|
|
|
570
642
|
try {
|
|
@@ -579,28 +651,59 @@ export function applyAuthProfileToSessionStore(sessionKey, authProfile, agentId
|
|
|
579
651
|
let changed = false;
|
|
580
652
|
|
|
581
653
|
for (const key of keyAliases) {
|
|
582
|
-
const
|
|
583
|
-
if (!
|
|
584
|
-
// Session doesn't exist yet -- create a minimal entry.
|
|
585
|
-
// The gateway will populate the rest on the first agent turn.
|
|
586
|
-
store[key] = {
|
|
587
|
-
updatedAt: now,
|
|
588
|
-
authProfileOverride: authProfile,
|
|
589
|
-
authProfileOverrideSource: 'user',
|
|
590
|
-
};
|
|
591
|
-
changed = true;
|
|
654
|
+
const existingEntry = store[key];
|
|
655
|
+
if (!existingEntry && !shouldSetAuthProfile && !shouldSetModelOverride) {
|
|
592
656
|
continue;
|
|
593
657
|
}
|
|
594
658
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
entry.
|
|
600
|
-
|
|
659
|
+
const entry = existingEntry || { updatedAt: now };
|
|
660
|
+
let entryChanged = false;
|
|
661
|
+
|
|
662
|
+
if (shouldSetAuthProfile) {
|
|
663
|
+
if (entry.authProfileOverride !== authProfile || entry.authProfileOverrideSource !== 'user') {
|
|
664
|
+
entry.authProfileOverride = authProfile;
|
|
665
|
+
entry.authProfileOverrideSource = 'user';
|
|
666
|
+
delete entry.authProfileOverrideCompactionCount;
|
|
667
|
+
entryChanged = true;
|
|
668
|
+
}
|
|
669
|
+
} else if (
|
|
670
|
+
entry.authProfileOverride !== undefined ||
|
|
671
|
+
entry.authProfileOverrideSource !== undefined ||
|
|
672
|
+
entry.authProfileOverrideCompactionCount !== undefined
|
|
673
|
+
) {
|
|
674
|
+
delete entry.authProfileOverride;
|
|
675
|
+
delete entry.authProfileOverrideSource;
|
|
601
676
|
delete entry.authProfileOverrideCompactionCount;
|
|
602
|
-
|
|
677
|
+
entryChanged = true;
|
|
603
678
|
}
|
|
679
|
+
|
|
680
|
+
if (shouldSetModelOverride) {
|
|
681
|
+
if (entry.modelOverride !== modelOverride) {
|
|
682
|
+
entry.modelOverride = modelOverride;
|
|
683
|
+
entryChanged = true;
|
|
684
|
+
}
|
|
685
|
+
if (providerOverride) {
|
|
686
|
+
if (entry.providerOverride !== providerOverride) {
|
|
687
|
+
entry.providerOverride = providerOverride;
|
|
688
|
+
entryChanged = true;
|
|
689
|
+
}
|
|
690
|
+
} else if (entry.providerOverride !== undefined) {
|
|
691
|
+
delete entry.providerOverride;
|
|
692
|
+
entryChanged = true;
|
|
693
|
+
}
|
|
694
|
+
} else if (entry.modelOverride !== undefined || entry.providerOverride !== undefined) {
|
|
695
|
+
delete entry.modelOverride;
|
|
696
|
+
delete entry.providerOverride;
|
|
697
|
+
entryChanged = true;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
if (!entryChanged) {
|
|
701
|
+
continue;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
entry.updatedAt = now;
|
|
705
|
+
store[key] = entry;
|
|
706
|
+
changed = true;
|
|
604
707
|
}
|
|
605
708
|
|
|
606
709
|
if (!changed) {
|
|
@@ -614,9 +717,16 @@ export function applyAuthProfileToSessionStore(sessionKey, authProfile, agentId
|
|
|
614
717
|
}
|
|
615
718
|
}
|
|
616
719
|
|
|
720
|
+
export function applyAuthProfileToSessionStore(sessionKey, authProfile, agentId = 'main') {
|
|
721
|
+
if (!sessionKey || !authProfile) {
|
|
722
|
+
return { ok: false, error: 'sessionKey and authProfile are required' };
|
|
723
|
+
}
|
|
724
|
+
return applySessionOverridesToSessionStore(sessionKey, { authProfile }, agentId);
|
|
725
|
+
}
|
|
726
|
+
|
|
617
727
|
/**
|
|
618
|
-
* Sync the live auth-profiles.json from
|
|
619
|
-
*
|
|
728
|
+
* Sync the live auth-profiles.json from the main agent store to the target
|
|
729
|
+
* agent store at ~/.openclaw/agents/<agentId>/agent/auth-profiles.json.
|
|
620
730
|
*
|
|
621
731
|
* This ensures scheduler sessions always use fresh credentials (tokens, order,
|
|
622
732
|
* default profile) even when no explicit auth_profile is set on the job.
|
|
@@ -630,7 +740,7 @@ export function applyAuthProfileToSessionStore(sessionKey, authProfile, agentId
|
|
|
630
740
|
* @returns {{ ok: boolean, error?: string }}
|
|
631
741
|
*/
|
|
632
742
|
export function syncAuthStoreToSession(agentId = 'main') {
|
|
633
|
-
const livePath = join(HOME_DIR, '.openclaw', '
|
|
743
|
+
const livePath = join(HOME_DIR, '.openclaw', 'agents', 'main', 'agent', 'auth-profiles.json');
|
|
634
744
|
const agentStorePath = join(HOME_DIR, '.openclaw', 'agents', agentId, 'agent', 'auth-profiles.json');
|
|
635
745
|
|
|
636
746
|
try {
|