openclaw-scheduler 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +16 -6
- package/cli.js +13 -4
- package/dispatch/README.md +18 -3
- package/dispatch/completion.mjs +1312 -34
- package/dispatch/hooks.mjs +17 -5
- package/dispatch/index.mjs +600 -226
- package/dispatch/message-input.mjs +67 -0
- package/dispatch/watcher.mjs +381 -43
- package/dispatcher-strategies.js +203 -30
- package/dispatcher.js +6 -1
- package/gateway.js +71 -8
- package/index.d.ts +1 -0
- package/package.json +3 -1
- package/scripts/dispatch-cli-utils.mjs +53 -0
- package/scripts/inbox-watcher-guardrail.mjs +506 -0
package/dispatch/index.mjs
CHANGED
|
@@ -32,8 +32,17 @@ import { randomUUID } from 'crypto';
|
|
|
32
32
|
import { execFileSync } from 'child_process';
|
|
33
33
|
import { homedir } from 'os';
|
|
34
34
|
import Database from 'better-sqlite3';
|
|
35
|
-
import {
|
|
35
|
+
import {
|
|
36
|
+
buildCompletionSignalInstructions,
|
|
37
|
+
buildTerminalCompletionPayload,
|
|
38
|
+
extractLastMeaningfulAssistantReplyFromEntries,
|
|
39
|
+
extractTerminalAssistantReplyFromEntries,
|
|
40
|
+
hasCompletionSignal,
|
|
41
|
+
taskRequiresGitSha,
|
|
42
|
+
} from './completion.mjs';
|
|
36
43
|
import { onStarted, onFinished, onStuck } from './hooks.mjs';
|
|
44
|
+
import { resolveMessageInput } from './message-input.mjs';
|
|
45
|
+
import { buildDispatchDeliverySurface } from '../scripts/dispatch-cli-utils.mjs';
|
|
37
46
|
|
|
38
47
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
39
48
|
const HOME_DIR = process.env.HOME || homedir();
|
|
@@ -110,6 +119,15 @@ function sleep(ms) {
|
|
|
110
119
|
return new Promise(r => setTimeout(r, ms));
|
|
111
120
|
}
|
|
112
121
|
|
|
122
|
+
function toTimestampMs(value) {
|
|
123
|
+
if (value == null) return null;
|
|
124
|
+
if (typeof value === 'number') {
|
|
125
|
+
return value < 1e12 ? value * 1000 : value;
|
|
126
|
+
}
|
|
127
|
+
const parsed = new Date(value).getTime();
|
|
128
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
129
|
+
}
|
|
130
|
+
|
|
113
131
|
/** Parse --flag value pairs from argv (supports both --flag value and --flag=value) */
|
|
114
132
|
function parseFlags(argv) {
|
|
115
133
|
const flags = {};
|
|
@@ -131,21 +149,6 @@ function parseFlags(argv) {
|
|
|
131
149
|
return flags;
|
|
132
150
|
}
|
|
133
151
|
|
|
134
|
-
function taskRequiresGitSha(taskPrompt) {
|
|
135
|
-
if (!taskPrompt || typeof taskPrompt !== 'string') return false;
|
|
136
|
-
|
|
137
|
-
const commandPattern = /\bgit\s+(push|rebase|cherry-pick)\b|(?:^|\s)--force-with-lease\b|(?:^|\s)--force-push\b/ig;
|
|
138
|
-
let match;
|
|
139
|
-
while ((match = commandPattern.exec(taskPrompt)) !== null) {
|
|
140
|
-
const before = taskPrompt.slice(Math.max(0, match.index - 40), match.index);
|
|
141
|
-
const negatedContext = /\b(?:do\s+not|don't|dont|never)\s+(?:use|run|call|invoke)?\s*$/i.test(before)
|
|
142
|
-
|| /\bavoid\s+(?:using\s+)?$/i.test(before)
|
|
143
|
-
|| /\bwithout\s+(?:using\s+)?$/i.test(before);
|
|
144
|
-
if (!negatedContext) return true;
|
|
145
|
-
}
|
|
146
|
-
return false;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
152
|
// -- Labels Ledger --------------------------------------------
|
|
150
153
|
|
|
151
154
|
function getLabelsSignature() {
|
|
@@ -202,6 +205,19 @@ function setLabel(name, data) {
|
|
|
202
205
|
return labels[name];
|
|
203
206
|
}
|
|
204
207
|
|
|
208
|
+
function setLabelDone(name, data) {
|
|
209
|
+
const labels = mutateLabels((current) => {
|
|
210
|
+
current[name] = {
|
|
211
|
+
...current[name],
|
|
212
|
+
...data,
|
|
213
|
+
status: 'done',
|
|
214
|
+
updatedAt: new Date().toISOString(),
|
|
215
|
+
};
|
|
216
|
+
delete current[name].error;
|
|
217
|
+
});
|
|
218
|
+
return labels[name];
|
|
219
|
+
}
|
|
220
|
+
|
|
205
221
|
// -- Gateway Calls --------------------------------------------
|
|
206
222
|
|
|
207
223
|
/**
|
|
@@ -247,23 +263,16 @@ function gatewayCall(method, params = {}, opts = {}) {
|
|
|
247
263
|
// -- Gateway Error Log Check ----------------------------------
|
|
248
264
|
|
|
249
265
|
/**
|
|
250
|
-
* Check the gateway error log for
|
|
266
|
+
* Check the gateway error log for the most recent diagnostic lane task error
|
|
251
267
|
* matching a specific session key.
|
|
252
268
|
*
|
|
253
269
|
* Scans the last N bytes of gateway.err.log for diagnostic lane task errors
|
|
254
|
-
* that reference the session key and
|
|
270
|
+
* that reference the session key and returns the newest error line.
|
|
255
271
|
*
|
|
256
272
|
* @param {string} sessionKey - The session key to check
|
|
257
273
|
* @returns {{ found: boolean, error: string|null, timestamp: string|null }}
|
|
258
274
|
*/
|
|
259
|
-
function
|
|
260
|
-
const OVERLOAD_PATTERNS = [
|
|
261
|
-
/529/i,
|
|
262
|
-
/failover\s*error/i,
|
|
263
|
-
/overload/i,
|
|
264
|
-
/temporarily\s+overloaded/i,
|
|
265
|
-
];
|
|
266
|
-
|
|
275
|
+
function getGatewayLaneTaskError(sessionKey) {
|
|
267
276
|
try {
|
|
268
277
|
const logPath = join(HOME_DIR, '.openclaw', 'logs', 'gateway.err.log');
|
|
269
278
|
if (!existsSync(logPath)) return { found: false, error: null, timestamp: null };
|
|
@@ -285,20 +294,15 @@ function check529InGatewayLog(sessionKey) {
|
|
|
285
294
|
if (!line.includes(sessionKey)) continue;
|
|
286
295
|
if (!line.includes('lane task error')) continue;
|
|
287
296
|
|
|
288
|
-
// Extract the error message
|
|
289
297
|
const errorMatch = line.match(/error="([^"]+)"/);
|
|
290
298
|
if (!errorMatch) continue;
|
|
291
299
|
|
|
292
|
-
const
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
error: `FailoverError (529): ${errorMsg}`,
|
|
299
|
-
timestamp: tsMatch ? tsMatch[1] : null,
|
|
300
|
-
};
|
|
301
|
-
}
|
|
300
|
+
const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)/);
|
|
301
|
+
return {
|
|
302
|
+
found: true,
|
|
303
|
+
error: errorMatch[1],
|
|
304
|
+
timestamp: tsMatch ? tsMatch[1] : null,
|
|
305
|
+
};
|
|
302
306
|
}
|
|
303
307
|
|
|
304
308
|
return { found: false, error: null, timestamp: null };
|
|
@@ -307,6 +311,32 @@ function check529InGatewayLog(sessionKey) {
|
|
|
307
311
|
}
|
|
308
312
|
}
|
|
309
313
|
|
|
314
|
+
/**
|
|
315
|
+
* Check the gateway error log for 529/FailoverError/overload errors
|
|
316
|
+
* matching a specific session key.
|
|
317
|
+
*
|
|
318
|
+
* @param {string} sessionKey - The session key to check
|
|
319
|
+
* @returns {{ found: boolean, error: string|null, timestamp: string|null }}
|
|
320
|
+
*/
|
|
321
|
+
function check529InGatewayLog(sessionKey) {
|
|
322
|
+
const OVERLOAD_PATTERNS = [
|
|
323
|
+
/529/i,
|
|
324
|
+
/failover\s*error/i,
|
|
325
|
+
/overload/i,
|
|
326
|
+
/temporarily\s+overloaded/i,
|
|
327
|
+
];
|
|
328
|
+
|
|
329
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
330
|
+
if (!laneError.found || !laneError.error) return { found: false, error: null, timestamp: null };
|
|
331
|
+
if (!OVERLOAD_PATTERNS.some(p => p.test(laneError.error))) return { found: false, error: null, timestamp: null };
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
found: true,
|
|
335
|
+
error: `FailoverError (529): ${laneError.error}`,
|
|
336
|
+
timestamp: laneError.timestamp,
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
310
340
|
// -- Sessions Store (Direct Read) -----------------------------
|
|
311
341
|
|
|
312
342
|
/**
|
|
@@ -328,6 +358,103 @@ function readSessionsStore(agent = 'main') {
|
|
|
328
358
|
}
|
|
329
359
|
}
|
|
330
360
|
|
|
361
|
+
function getSessionJsonlPath(agent = 'main', sessionId) {
|
|
362
|
+
if (!sessionId) return null;
|
|
363
|
+
return join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function inspectSessionActivitySignal(sessionKey, sessionsStore) {
|
|
367
|
+
if (!sessionKey || !sessionsStore?.[sessionKey]) {
|
|
368
|
+
return {
|
|
369
|
+
found: false,
|
|
370
|
+
hasStartedSignal: false,
|
|
371
|
+
hasActivitySignal: false,
|
|
372
|
+
messageCount: null,
|
|
373
|
+
jsonlExists: false,
|
|
374
|
+
hasTokens: false,
|
|
375
|
+
updatedAtMs: null,
|
|
376
|
+
sessionStartedAtMs: null,
|
|
377
|
+
sessionId: null,
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const agent = agentFromSessionKey(sessionKey) || 'main';
|
|
382
|
+
const entry = sessionsStore[sessionKey];
|
|
383
|
+
const jsonlPath = getSessionJsonlPath(agent, entry.sessionId);
|
|
384
|
+
const jsonlExists = jsonlPath ? existsSync(jsonlPath) : false;
|
|
385
|
+
const hasTokens = typeof entry.totalTokens === 'number' && entry.totalTokens > 0;
|
|
386
|
+
const sessionStartedAtMs = toTimestampMs(entry.sessionStartedAt || entry.startedAt);
|
|
387
|
+
const updatedAtMs = toTimestampMs(entry.updatedAt);
|
|
388
|
+
const hasStartedSignal = Boolean(entry.sessionId) || sessionStartedAtMs !== null || updatedAtMs !== null;
|
|
389
|
+
let messageCount = null;
|
|
390
|
+
|
|
391
|
+
try {
|
|
392
|
+
const history = gatewayCall('chat.history', { sessionKey }, { timeout: 8000 });
|
|
393
|
+
if (Array.isArray(history?.messages)) {
|
|
394
|
+
messageCount = history.messages.length;
|
|
395
|
+
}
|
|
396
|
+
} catch {}
|
|
397
|
+
|
|
398
|
+
return {
|
|
399
|
+
found: true,
|
|
400
|
+
hasStartedSignal,
|
|
401
|
+
hasActivitySignal: jsonlExists || hasTokens || (typeof messageCount === 'number' && messageCount > 0),
|
|
402
|
+
messageCount,
|
|
403
|
+
jsonlExists,
|
|
404
|
+
hasTokens,
|
|
405
|
+
updatedAtMs,
|
|
406
|
+
sessionStartedAtMs,
|
|
407
|
+
sessionId: entry.sessionId || null,
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function inspectSessionBootstrapFailure(sessionKey, sessionsStore, spawnedAtMs, startupGraceMs) {
|
|
412
|
+
if (!sessionKey || !sessionsStore?.[sessionKey]) {
|
|
413
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const ageMs = spawnedAtMs ? Date.now() - spawnedAtMs : Infinity;
|
|
417
|
+
if (ageMs < startupGraceMs) {
|
|
418
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
422
|
+
if (laneError.found && laneError.error) {
|
|
423
|
+
return {
|
|
424
|
+
shouldResolve: true,
|
|
425
|
+
reason: `diagnostic lane error: ${laneError.error}`,
|
|
426
|
+
errorMsg: `spawn-failure: ${laneError.error}`,
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// A Codex session can enter the sessions store before chat.history, JSONL, or
|
|
431
|
+
// token counters are written. Treat that as "still booting"; the watcher and
|
|
432
|
+
// job timeout own later failure handling. Only fail fast when the gateway has
|
|
433
|
+
// recorded an explicit lane error above.
|
|
434
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function readJsonlTailEntries(sessionId, agent = 'main', maxLines = 200) {
|
|
438
|
+
if (!sessionId) return null;
|
|
439
|
+
try {
|
|
440
|
+
const jsonlPath = join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
|
|
441
|
+
return readFileSync(jsonlPath, 'utf-8')
|
|
442
|
+
.split('\n')
|
|
443
|
+
.filter(line => line.trim())
|
|
444
|
+
.slice(-maxLines)
|
|
445
|
+
.map(line => {
|
|
446
|
+
try {
|
|
447
|
+
return JSON.parse(line);
|
|
448
|
+
} catch {
|
|
449
|
+
return null;
|
|
450
|
+
}
|
|
451
|
+
})
|
|
452
|
+
.filter(Boolean);
|
|
453
|
+
} catch {
|
|
454
|
+
return null;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
331
458
|
/**
|
|
332
459
|
* Auto-detect the originating channel from the most recently active main session.
|
|
333
460
|
* Reads sessions.json, finds sessions active within the last 10 minutes,
|
|
@@ -348,6 +475,17 @@ function inferChatType(key, session) {
|
|
|
348
475
|
return "";
|
|
349
476
|
}
|
|
350
477
|
|
|
478
|
+
function parseOriginTarget(origin) {
|
|
479
|
+
const match = /^([^:]+):(.+)$/.exec(origin || '');
|
|
480
|
+
if (!match) return { channel: null, target: null };
|
|
481
|
+
return { channel: match[1], target: match[2] };
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
function originFromDeliveryTarget(deliverTo, deliverChannel = 'telegram') {
|
|
485
|
+
if (!deliverTo) return null;
|
|
486
|
+
return `${deliverChannel || 'telegram'}:${deliverTo}`;
|
|
487
|
+
}
|
|
488
|
+
|
|
351
489
|
function getActiveOriginFromSessions() {
|
|
352
490
|
const store = readSessionsStore("main");
|
|
353
491
|
if (!store) return null;
|
|
@@ -551,6 +689,70 @@ function disarmWatchdog(label) {
|
|
|
551
689
|
}
|
|
552
690
|
}
|
|
553
691
|
|
|
692
|
+
|
|
693
|
+
function quoteForSingleQuotedShell(value) {
|
|
694
|
+
return String(value).replace(/'/g, "'\"'\"'");
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Schedule a quick-poll delivery watcher shell job for a dispatch label.
|
|
699
|
+
* Used both for the initial watcher registration and SIGTERM handoffs.
|
|
700
|
+
*/
|
|
701
|
+
function scheduleDeliveryWatcherJob({
|
|
702
|
+
label,
|
|
703
|
+
deliverTo,
|
|
704
|
+
deliverChannel = 'telegram',
|
|
705
|
+
timeoutSeconds = 300,
|
|
706
|
+
idleThresholdSeconds = 300,
|
|
707
|
+
origin = 'system',
|
|
708
|
+
agentBrand = BRAND,
|
|
709
|
+
nameSuffix = '',
|
|
710
|
+
}) {
|
|
711
|
+
if (!label) throw new Error('label is required');
|
|
712
|
+
if (!deliverTo) throw new Error('deliverTo is required');
|
|
713
|
+
|
|
714
|
+
const schedulerCli = join(__dirname, '..', 'cli.js');
|
|
715
|
+
const watcherPath = join(__dirname, 'watcher.mjs');
|
|
716
|
+
const watcherTimeoutS = Number(timeoutSeconds) + 120;
|
|
717
|
+
const idleThresholdS = Number(idleThresholdSeconds) || 300;
|
|
718
|
+
const sq = quoteForSingleQuotedShell;
|
|
719
|
+
const watcherCmd =
|
|
720
|
+
`DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' ` +
|
|
721
|
+
`DISPATCH_INDEX_PATH='${sq(join(__dirname, 'index.mjs'))}' ` +
|
|
722
|
+
`'${sq(process.execPath)}' '${sq(watcherPath)}' ` +
|
|
723
|
+
`--label '${sq(label)}' --timeout ${watcherTimeoutS} ` +
|
|
724
|
+
`--poll-interval 20 --idle-threshold ${idleThresholdS} --once`;
|
|
725
|
+
|
|
726
|
+
const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
|
|
727
|
+
const jobSpec = {
|
|
728
|
+
name: `${agentBrand}-deliver:${label}${nameSuffix}`,
|
|
729
|
+
schedule_kind: 'cron',
|
|
730
|
+
schedule_cron: config.deliver_watcher_cron || '* * * * *',
|
|
731
|
+
next_run_at: nowUtc,
|
|
732
|
+
session_target: 'shell',
|
|
733
|
+
payload_kind: 'shellCommand',
|
|
734
|
+
payload_message: watcherCmd,
|
|
735
|
+
delivery_mode: 'announce-always',
|
|
736
|
+
delivery_channel: deliverChannel,
|
|
737
|
+
delivery_to: deliverTo,
|
|
738
|
+
delivery_guarantee: 'at-least-once',
|
|
739
|
+
ttl_hours: config.deliver_watcher_ttl_hours ?? 48,
|
|
740
|
+
overlap_policy: 'skip',
|
|
741
|
+
run_timeout_ms: 120_000,
|
|
742
|
+
delete_after_run: 1,
|
|
743
|
+
origin: origin || 'system',
|
|
744
|
+
};
|
|
745
|
+
|
|
746
|
+
const raw = execFileSync(process.execPath, [schedulerCli, '--json', 'jobs', 'add', JSON.stringify(jobSpec)], {
|
|
747
|
+
encoding: 'utf-8',
|
|
748
|
+
timeout: 10000,
|
|
749
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
750
|
+
});
|
|
751
|
+
|
|
752
|
+
const parsed = JSON.parse(raw.trim());
|
|
753
|
+
return parsed?.job || null;
|
|
754
|
+
}
|
|
755
|
+
|
|
554
756
|
// -- Session Helpers ------------------------------------------
|
|
555
757
|
|
|
556
758
|
/** Build a unique session key for a new subagent session. */
|
|
@@ -565,12 +767,19 @@ function makeSessionKey(agentId) {
|
|
|
565
767
|
*
|
|
566
768
|
* Flags:
|
|
567
769
|
* --label <string> Required. Human-readable name
|
|
568
|
-
* --message <string>
|
|
770
|
+
* --message <string> Prompt sent to the agent
|
|
771
|
+
* --message-file <path> Read prompt text from a file (`-` = stdin)
|
|
772
|
+
* --message-env <VAR> Read prompt text from an environment variable
|
|
773
|
+
* --message-stdin Read prompt text from stdin explicitly
|
|
774
|
+
* (stdin is also auto-read when piped and no other message source is set)
|
|
569
775
|
* --agent <string> Agent ID (default: main)
|
|
570
776
|
* --thinking <string> Reasoning level: low|high|xhigh (default: not set)
|
|
571
777
|
* --timeout <seconds> Run timeout in seconds (default: 300)
|
|
572
|
-
* --origin <origin>
|
|
573
|
-
*
|
|
778
|
+
* --origin <origin> Explicit dispatch origin for audit/retries (e.g. "telegram:<chat_id>", "system")
|
|
779
|
+
* If omitted but --deliver-to is explicit, dispatch derives origin from that target.
|
|
780
|
+
* Active-session auto-detect is preserved only as a manual/local fallback when both are absent.
|
|
781
|
+
* --deliver-to <target> Delivery target (e.g. Telegram chat ID). Registers the scheduler watcher for durable final delivery.
|
|
782
|
+
* Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.
|
|
574
783
|
* Defaults to origin chat ID when --origin is a "telegram:<id>" string.
|
|
575
784
|
* --deliver-channel <ch> Delivery channel for --deliver-to (default: telegram)
|
|
576
785
|
* --delivery-mode <mode> announce|announce-always|none (default: announce)
|
|
@@ -581,18 +790,23 @@ function makeSessionKey(agentId) {
|
|
|
581
790
|
* --model <string> Model override (e.g. anthropic/claude-sonnet-4-6)
|
|
582
791
|
*/
|
|
583
792
|
async function cmdEnqueue(flags) {
|
|
584
|
-
const label
|
|
585
|
-
let message = flags.message;
|
|
793
|
+
const label = flags.label;
|
|
586
794
|
if (!label) die('--label is required', 2);
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
795
|
+
|
|
796
|
+
let message = null;
|
|
797
|
+
try {
|
|
798
|
+
message = await resolveMessageInput({
|
|
799
|
+
message: flags.message,
|
|
800
|
+
messageFile: flags['message-file'],
|
|
801
|
+
messageEnv: flags['message-env'],
|
|
802
|
+
messageStdin: flags['message-stdin'],
|
|
803
|
+
});
|
|
804
|
+
} catch (err) {
|
|
805
|
+
die(err.message, 2);
|
|
806
|
+
}
|
|
807
|
+
if (message === null || message.length === 0) {
|
|
808
|
+
die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
|
|
594
809
|
}
|
|
595
|
-
if (!message) die('--message or --message-file is required', 2);
|
|
596
810
|
|
|
597
811
|
const agent = flags.agent || 'main';
|
|
598
812
|
const thinking = flags.thinking || null;
|
|
@@ -605,30 +819,44 @@ async function cmdEnqueue(flags) {
|
|
|
605
819
|
process.stderr.write(`[${BRAND}] WARNING: --timeout not specified, defaulting to 300s. ` +
|
|
606
820
|
`Pass --timeout explicitly (≥1200 for thinking=high tasks) to avoid premature watcher kills.\n`);
|
|
607
821
|
}
|
|
608
|
-
|
|
822
|
+
const explicitOrigin = flags.origin || null;
|
|
823
|
+
const explicitDeliverTo = flags['deliver-to'] || null;
|
|
824
|
+
const explicitDeliverChannel = flags['deliver-channel'] || null;
|
|
825
|
+
let origin = explicitOrigin;
|
|
826
|
+
|
|
827
|
+
// Contract: chat-triggered callers should pass --deliver-to from inbound
|
|
828
|
+
// metadata chat_id. If they omit --origin, derive it from that explicit
|
|
829
|
+
// delivery target so dispatch never falls back to whichever session happened
|
|
830
|
+
// to be active most recently.
|
|
831
|
+
if (!origin && explicitDeliverTo) {
|
|
832
|
+
origin = originFromDeliveryTarget(explicitDeliverTo, explicitDeliverChannel || 'telegram');
|
|
833
|
+
}
|
|
609
834
|
|
|
610
|
-
//
|
|
611
|
-
|
|
835
|
+
// Preserve active-session inference only as a manual/local fallback when the
|
|
836
|
+
// caller truly omitted both origin and delivery target.
|
|
837
|
+
if (!origin && !explicitDeliverTo) {
|
|
612
838
|
origin = getActiveOriginFromSessions();
|
|
613
839
|
if (origin) {
|
|
614
840
|
process.stderr.write(`[${BRAND}] auto-detected origin from active session: ${origin}\n`);
|
|
841
|
+
process.stderr.write(`[${BRAND}] NOTE: active-session origin detection is a manual/local fallback. ` +
|
|
842
|
+
`Chat-triggered callers should pass --deliver-to from inbound metadata chat_id.\n`);
|
|
615
843
|
}
|
|
616
844
|
}
|
|
617
845
|
|
|
618
846
|
// -- Auto-derive deliver-to from origin ---------------------------------
|
|
619
847
|
// If origin is "telegram:<id>", use <id> as the default deliver-to target.
|
|
620
848
|
let defaultDeliverTo = null;
|
|
621
|
-
let defaultDeliverCh = 'telegram';
|
|
849
|
+
let defaultDeliverCh = explicitDeliverChannel || 'telegram';
|
|
622
850
|
if (origin) {
|
|
623
|
-
const
|
|
624
|
-
if (
|
|
625
|
-
defaultDeliverCh
|
|
626
|
-
defaultDeliverTo
|
|
851
|
+
const { channel, target } = parseOriginTarget(origin);
|
|
852
|
+
if (channel && target) {
|
|
853
|
+
if (!explicitDeliverChannel) defaultDeliverCh = channel;
|
|
854
|
+
defaultDeliverTo = target;
|
|
627
855
|
}
|
|
628
856
|
}
|
|
629
857
|
|
|
630
|
-
const deliverTo =
|
|
631
|
-
const deliverChannel =
|
|
858
|
+
const deliverTo = explicitDeliverTo || defaultDeliverTo;
|
|
859
|
+
const deliverChannel = explicitDeliverChannel || defaultDeliverCh || 'telegram';
|
|
632
860
|
const deliverMode = flags['delivery-mode'] || 'announce';
|
|
633
861
|
const mode = flags.mode || 'fresh';
|
|
634
862
|
|
|
@@ -645,6 +873,9 @@ async function cmdEnqueue(flags) {
|
|
|
645
873
|
|
|
646
874
|
// -- Watchdog monitoring flags -----------------------------
|
|
647
875
|
const noMonitorRaw = flags['no-monitor'];
|
|
876
|
+
const noMonitorReason = typeof noMonitorRaw === 'string' && noMonitorRaw.trim()
|
|
877
|
+
? noMonitorRaw.trim()
|
|
878
|
+
: null;
|
|
648
879
|
const noMonitor = !!noMonitorRaw;
|
|
649
880
|
const monitorEnabled = !noMonitor && flags.monitor !== 'false';
|
|
650
881
|
const monitorInterval = flags['monitor-interval'] || config.watchdogIntervalCron || '*/15 * * * *';
|
|
@@ -659,6 +890,7 @@ async function cmdEnqueue(flags) {
|
|
|
659
890
|
"REJECTED: --deliver-to is required for dispatch jobs.\n" +
|
|
660
891
|
"Pass --deliver-to <chat_id> (e.g. --deliver-to -100200000000 for a group, " +
|
|
661
892
|
"or --deliver-to 123456789 for a DM).\n" +
|
|
893
|
+
"Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.\n" +
|
|
662
894
|
"Alternatively, pass --origin telegram:<chat_id> to auto-derive the delivery target.\n" +
|
|
663
895
|
"Pass --no-monitor \"<reason>\" only if you explicitly want to skip delivery (audit trail required).",
|
|
664
896
|
2
|
|
@@ -769,26 +1001,11 @@ async function cmdEnqueue(flags) {
|
|
|
769
1001
|
const doneScriptPath = join(__dirname, 'index.mjs');
|
|
770
1002
|
parts.push(``);
|
|
771
1003
|
parts.push(`---`);
|
|
772
|
-
parts.push(
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
parts.push(` 3. All API calls (e.g. GitHub comment replies) are done`);
|
|
778
|
-
parts.push(` 4. You have verified the work is complete`);
|
|
779
|
-
parts.push(``);
|
|
780
|
-
parts.push(`Call this as your ABSOLUTE FINAL action -- nothing else runs after this:`);
|
|
781
|
-
parts.push(` node '${doneScriptPath}' done --label '${label.replace(/'/g, "'\\''")}' \\`);
|
|
782
|
-
parts.push(` --summary "<what you actually did>" \\`);
|
|
783
|
-
parts.push(` --checklist '{"work_complete":true,"tests_passed":true,"pushed":true}' \\`);
|
|
784
|
-
parts.push(` [--sha "<git commit SHA if applicable>"]`);
|
|
785
|
-
parts.push(``);
|
|
786
|
-
parts.push(`Checklist rules:`);
|
|
787
|
-
parts.push(` - work_complete MUST be true -- you are asserting you have finished ALL assigned work`);
|
|
788
|
-
parts.push(` - If tests failed or push failed, do NOT set tests_passed:true or pushed:true -- instead continue working`);
|
|
789
|
-
parts.push(` - Only include tests_passed/pushed if they apply to your task`);
|
|
790
|
-
parts.push(`If your task involved git commits, --sha is required and must be the actual SHA of your pushed commit. The done script will reject invented or placeholder SHAs.`);
|
|
791
|
-
parts.push(`Do NOT call done while planning, reading files, or mid-task. If you have not yet pushed a commit, you are not done.`);
|
|
1004
|
+
parts.push(buildCompletionSignalInstructions({
|
|
1005
|
+
label,
|
|
1006
|
+
taskPrompt: message,
|
|
1007
|
+
doneScriptPath,
|
|
1008
|
+
}));
|
|
792
1009
|
parts.push(`---`);
|
|
793
1010
|
parts.push(``);
|
|
794
1011
|
parts.push(`---`);
|
|
@@ -802,15 +1019,16 @@ async function cmdEnqueue(flags) {
|
|
|
802
1019
|
const taskMessage = parts.join('\n');
|
|
803
1020
|
|
|
804
1021
|
// -- Call gateway agent method -------------------------------
|
|
805
|
-
//
|
|
806
|
-
//
|
|
807
|
-
//
|
|
1022
|
+
// Final user delivery belongs to the scheduler watcher below.
|
|
1023
|
+
// Keep the gateway spawn fire-and-forget so raw tool output or internal
|
|
1024
|
+
// done payloads cannot leak directly to the chat ahead of the durable
|
|
1025
|
+
// post-office delivery path.
|
|
808
1026
|
try {
|
|
809
1027
|
const response = gatewayCall('agent', {
|
|
810
1028
|
message: taskMessage,
|
|
811
1029
|
sessionKey,
|
|
812
1030
|
idempotencyKey: idem,
|
|
813
|
-
deliver:
|
|
1031
|
+
deliver: false,
|
|
814
1032
|
lane: 'subagent',
|
|
815
1033
|
timeout: timeoutS,
|
|
816
1034
|
label: label,
|
|
@@ -822,6 +1040,11 @@ async function cmdEnqueue(flags) {
|
|
|
822
1040
|
} : {}),
|
|
823
1041
|
}, { timeout: 15000 });
|
|
824
1042
|
|
|
1043
|
+
const deliveryDisabled = !deliverTo && noMonitor;
|
|
1044
|
+
const deliveryDisabledReason = deliveryDisabled
|
|
1045
|
+
? (noMonitorReason || 'explicit opt-out via --no-monitor')
|
|
1046
|
+
: null;
|
|
1047
|
+
|
|
825
1048
|
// Update ledger
|
|
826
1049
|
setLabel(label, {
|
|
827
1050
|
sessionKey,
|
|
@@ -834,9 +1057,12 @@ async function cmdEnqueue(flags) {
|
|
|
834
1057
|
deliverTo: deliverTo || null,
|
|
835
1058
|
deliverChannel: deliverChannel || null,
|
|
836
1059
|
deliveryMode: deliverMode || null,
|
|
1060
|
+
deliveryDisabled,
|
|
1061
|
+
deliveryDisabledReason,
|
|
837
1062
|
verifyCmd: verifyCmd || null,
|
|
838
1063
|
spawnedAt: new Date().toISOString(),
|
|
839
1064
|
timeoutSeconds: timeoutS,
|
|
1065
|
+
idleThresholdSeconds: parseInt(flags['idle-threshold'] || '300', 10),
|
|
840
1066
|
// Fix 4: Store timeout so cmdDone threshold logic can use it correctly.
|
|
841
1067
|
timeout: timeoutS,
|
|
842
1068
|
status: 'running',
|
|
@@ -879,51 +1105,29 @@ async function cmdEnqueue(flags) {
|
|
|
879
1105
|
}
|
|
880
1106
|
|
|
881
1107
|
// -- Register scheduler watcher for delivery ---------------
|
|
882
|
-
// Creates a
|
|
883
|
-
//
|
|
884
|
-
//
|
|
885
|
-
//
|
|
1108
|
+
// Creates a quick-poll shell job that runs watcher.mjs once per tick. Empty
|
|
1109
|
+
// stdout means "still running" and advances the next tick without delivery.
|
|
1110
|
+
// Terminal stdout goes through the scheduler's handleDelivery with retry,
|
|
1111
|
+
// alias resolution, and audit trail in scheduler.db.
|
|
1112
|
+
// The watcher is the only final-delivery path for dispatched jobs.
|
|
886
1113
|
const sq = s => String(s).replace(/'/g, "'\\''");
|
|
887
1114
|
let schedulerWatcherOk = false;
|
|
888
1115
|
if (deliverTo && deliverMode !== 'none') {
|
|
889
1116
|
try {
|
|
890
|
-
const
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
name: `${agentBrand}-deliver:${label}`,
|
|
899
|
-
schedule_kind: 'at',
|
|
900
|
-
schedule_at: nowUtc,
|
|
901
|
-
session_target: 'shell',
|
|
902
|
-
payload_kind: 'shellCommand',
|
|
903
|
-
payload_message: watcherCmd,
|
|
904
|
-
delivery_mode: 'announce-always',
|
|
905
|
-
delivery_channel: deliverChannel,
|
|
906
|
-
delivery_to: deliverTo,
|
|
907
|
-
delivery_guarantee: 'at-least-once',
|
|
908
|
-
ttl_hours: config.deliver_watcher_ttl_hours ?? 48, // configurable TTL (deliver_watcher_ttl_hours); default 48h
|
|
909
|
-
overlap_policy: 'skip',
|
|
910
|
-
// Shell ceiling = max(initial timeout, rolling extension cap) + headroom.
|
|
911
|
-
// The watcher can extend its deadline up to MAX_DEADLINE_EXTENSION (4h) on
|
|
912
|
-
// activity (token growth / JSONL mtime). Headroom covers 2*FLAT_WINDOW + slop.
|
|
913
|
-
// Watcher constants: FLAT_WINDOW_MS=180s, MAX_DEADLINE_EXTENSION=4h.
|
|
914
|
-
run_timeout_ms: Math.max(watcherTimeoutS, 4 * 3600) * 1000
|
|
915
|
-
+ 420 * 1000, // +7min headroom (2*FLAT_WINDOW + 1min slop)
|
|
916
|
-
delete_after_run: 1, // auto-delete after watcher completes
|
|
917
|
-
origin: origin || 'system',
|
|
918
|
-
});
|
|
919
|
-
const schedulerCli = join(__dirname, '..', 'cli.js');
|
|
920
|
-
execFileSync(process.execPath, [schedulerCli, 'jobs', 'add', jobSpec], {
|
|
921
|
-
encoding: 'utf-8',
|
|
922
|
-
timeout: 10000,
|
|
923
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1117
|
+
const watcherJob = scheduleDeliveryWatcherJob({
|
|
1118
|
+
label,
|
|
1119
|
+
deliverTo,
|
|
1120
|
+
deliverChannel,
|
|
1121
|
+
timeoutSeconds: timeoutS,
|
|
1122
|
+
idleThresholdSeconds: flags['idle-threshold'] || '300',
|
|
1123
|
+
origin: origin || 'system',
|
|
1124
|
+
agentBrand,
|
|
924
1125
|
});
|
|
925
1126
|
schedulerWatcherOk = true;
|
|
926
|
-
process.stderr.write(
|
|
1127
|
+
process.stderr.write(
|
|
1128
|
+
`[${agentBrand}] scheduler watcher registered: ${agentBrand}-deliver:${label}` +
|
|
1129
|
+
`${watcherJob?.id ? ` (${watcherJob.id})` : ''}\n`
|
|
1130
|
+
);
|
|
927
1131
|
} catch (err) {
|
|
928
1132
|
process.stderr.write(`[${agentBrand}] scheduler watcher FAILED (gateway fallback active): ${err.message}\n`);
|
|
929
1133
|
}
|
|
@@ -934,7 +1138,7 @@ async function cmdEnqueue(flags) {
|
|
|
934
1138
|
let watchdogJobId = null;
|
|
935
1139
|
if (monitorEnabled && deliverTo) {
|
|
936
1140
|
try {
|
|
937
|
-
const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}'
|
|
1141
|
+
const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}' result --label '${sq(label)}'`;
|
|
938
1142
|
const alertChannel = deliverChannel || 'telegram';
|
|
939
1143
|
const alertTarget = deliverTo;
|
|
940
1144
|
const watchdogSpec = JSON.stringify({
|
|
@@ -979,6 +1183,18 @@ async function cmdEnqueue(flags) {
|
|
|
979
1183
|
}
|
|
980
1184
|
}
|
|
981
1185
|
|
|
1186
|
+
const delivery = buildDispatchDeliverySurface({
|
|
1187
|
+
deliverTo,
|
|
1188
|
+
deliverChannel,
|
|
1189
|
+
deliveryMode: deliverMode,
|
|
1190
|
+
deliveryDisabled,
|
|
1191
|
+
deliveryDisabledReason,
|
|
1192
|
+
...(deliverTo ? {
|
|
1193
|
+
scheduler: schedulerWatcherOk,
|
|
1194
|
+
gateway: true,
|
|
1195
|
+
} : {}),
|
|
1196
|
+
});
|
|
1197
|
+
|
|
982
1198
|
out({
|
|
983
1199
|
ok: true,
|
|
984
1200
|
label,
|
|
@@ -987,12 +1203,7 @@ async function cmdEnqueue(flags) {
|
|
|
987
1203
|
mode: isFresh ? 'fresh' : 'reuse',
|
|
988
1204
|
agent,
|
|
989
1205
|
status: 'accepted',
|
|
990
|
-
delivery
|
|
991
|
-
scheduler: schedulerWatcherOk,
|
|
992
|
-
gateway: !!deliverTo,
|
|
993
|
-
target: deliverTo,
|
|
994
|
-
channel: deliverChannel,
|
|
995
|
-
} : null,
|
|
1206
|
+
delivery,
|
|
996
1207
|
watchdog: monitorEnabled ? {
|
|
997
1208
|
enabled: watchdogJobOk,
|
|
998
1209
|
jobId: watchdogJobId,
|
|
@@ -1000,35 +1211,46 @@ async function cmdEnqueue(flags) {
|
|
|
1000
1211
|
timeout: monitorTimeout,
|
|
1001
1212
|
...(monitorEnabled && !deliverTo ? { skipped: true, reason: 'no --deliver-to target' } : {}),
|
|
1002
1213
|
} : null,
|
|
1003
|
-
message:
|
|
1004
|
-
?
|
|
1005
|
-
:
|
|
1006
|
-
? 'Session spawned. Delivery via
|
|
1007
|
-
:
|
|
1214
|
+
message: delivery.status === 'disabled'
|
|
1215
|
+
? `Session spawned. Delivery intentionally disabled${delivery.reason ? ` (${delivery.reason}).` : '.'}`
|
|
1216
|
+
: schedulerWatcherOk
|
|
1217
|
+
? 'Session spawned. Delivery via scheduler (primary) + gateway (secondary).'
|
|
1218
|
+
: deliverTo
|
|
1219
|
+
? 'Session spawned. Delivery via gateway only (scheduler watcher failed).'
|
|
1220
|
+
: 'Session spawned. Delivery target missing or not recorded.',
|
|
1008
1221
|
});
|
|
1009
1222
|
|
|
1010
1223
|
// -- Post-spawn verification (Fix 3) --------------------------------
|
|
1011
1224
|
// Canary: poll sessions.json up to 3 times at 10s intervals to confirm the
|
|
1012
|
-
// session appeared in the store.
|
|
1013
|
-
//
|
|
1014
|
-
//
|
|
1225
|
+
// session appeared in the store. A session store entry with sessionId or
|
|
1226
|
+
// startedAt/sessionStartedAt is enough: long first turns may not flush JSONL,
|
|
1227
|
+
// token counts, or chat.history until the model call completes. The delivery
|
|
1228
|
+
// watcher owns later completion/failure handling.
|
|
1015
1229
|
const SPAWN_POLL_MAX = 3;
|
|
1016
1230
|
const SPAWN_POLL_DELAY_MS = 10_000;
|
|
1017
1231
|
let spawnConfirmed = false;
|
|
1018
1232
|
for (let spawnPoll = 0; spawnPoll < SPAWN_POLL_MAX; spawnPoll++) {
|
|
1019
1233
|
await sleep(SPAWN_POLL_DELAY_MS);
|
|
1020
1234
|
const spawnStore = readSessionsStore(agent);
|
|
1021
|
-
|
|
1235
|
+
const signal = inspectSessionActivitySignal(sessionKey, spawnStore);
|
|
1236
|
+
if (signal.hasStartedSignal || signal.hasActivitySignal) {
|
|
1022
1237
|
spawnConfirmed = true;
|
|
1023
1238
|
break;
|
|
1024
1239
|
}
|
|
1025
1240
|
}
|
|
1026
1241
|
if (!spawnConfirmed) {
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1242
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
1243
|
+
const spawnError = laneError.found && laneError.error
|
|
1244
|
+
? `spawn-failure: ${laneError.error}`
|
|
1245
|
+
: `spawn-failure: session ${sessionKey} never produced transcript/history within ` +
|
|
1246
|
+
`${(SPAWN_POLL_MAX * SPAWN_POLL_DELAY_MS) / 1000}s`;
|
|
1247
|
+
process.stderr.write(`[${agentBrand}] WARNING: ${spawnError}\n`);
|
|
1248
|
+
setLabel(label, {
|
|
1249
|
+
status: 'error',
|
|
1250
|
+
error: spawnError,
|
|
1251
|
+
summary: spawnError,
|
|
1252
|
+
});
|
|
1253
|
+
disarmWatchdog(label);
|
|
1032
1254
|
}
|
|
1033
1255
|
} catch (err) {
|
|
1034
1256
|
die(`gateway agent call failed: ${err.message}`);
|
|
@@ -1065,62 +1287,80 @@ function cmdStatus(flags) {
|
|
|
1065
1287
|
const ageMs = Date.now() - spawnedAtMs;
|
|
1066
1288
|
const STARTUP_GRACE_MS = config.startupGraceMs ?? 300_000;
|
|
1067
1289
|
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
//
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1290
|
+
const bootstrapFailure = !entry.lastPing
|
|
1291
|
+
? inspectSessionBootstrapFailure(
|
|
1292
|
+
entry.sessionKey,
|
|
1293
|
+
sessionsStore,
|
|
1294
|
+
spawnedAtMs,
|
|
1295
|
+
STARTUP_GRACE_MS,
|
|
1296
|
+
)
|
|
1297
|
+
: { shouldResolve: false, reason: null, errorMsg: null };
|
|
1298
|
+
if (bootstrapFailure.shouldResolve) {
|
|
1299
|
+
setLabel(label, {
|
|
1300
|
+
status: 'error',
|
|
1301
|
+
error: bootstrapFailure.errorMsg,
|
|
1302
|
+
summary: `Auto-resolved as spawn failure: ${bootstrapFailure.reason}`,
|
|
1303
|
+
});
|
|
1304
|
+
syncAction = `auto-resolved as spawn failure: ${bootstrapFailure.reason}`;
|
|
1305
|
+
disarmWatchdog(label);
|
|
1306
|
+
} else {
|
|
1307
|
+
// -- Heartbeat-based liveness guard ----------------------------------
|
|
1308
|
+
// The watcher process writes lastPing every 60s while the session is live.
|
|
1309
|
+
// If the ping is fresh, the watcher is alive and working -- defer auto-resolve
|
|
1310
|
+
// to avoid killing sessions during slow tool calls, docker builds, etc.
|
|
1311
|
+
//
|
|
1312
|
+
// PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
|
|
1313
|
+
// watcher in 3 min, it's probably dead; fall through to check.
|
|
1314
|
+
// hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
|
|
1315
|
+
// Catches zombie watchers (watcher alive but session is stuck).
|
|
1316
|
+
// idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
|
|
1317
|
+
// threshold so longer jobs aren't killed at exactly 10 min.
|
|
1318
|
+
const PING_STALE_MS = 3 * 60 * 1000;
|
|
1319
|
+
const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
|
|
1320
|
+
// hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
|
|
1321
|
+
// idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
|
|
1322
|
+
// zombie-guard threshold for sessions that should still use idleThresholdMs).
|
|
1323
|
+
const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
|
|
1324
|
+
|
|
1325
|
+
let check;
|
|
1326
|
+
if (ageMs < STARTUP_GRACE_MS) {
|
|
1327
|
+
// Within startup grace -- never auto-resolve
|
|
1094
1328
|
check = { shouldResolve: false };
|
|
1329
|
+
} else if (entry.lastPing) {
|
|
1330
|
+
const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
|
|
1331
|
+
if (pingAgeMs < PING_STALE_MS && ageMs < hardCeilingMs) {
|
|
1332
|
+
// Watcher alive and within job ceiling -- defer auto-resolve
|
|
1333
|
+
check = { shouldResolve: false };
|
|
1334
|
+
} else {
|
|
1335
|
+
// Ping stale OR past hard ceiling: fall through to session store check
|
|
1336
|
+
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1337
|
+
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1338
|
+
}
|
|
1095
1339
|
} else {
|
|
1096
|
-
//
|
|
1340
|
+
// No lastPing -- backward compat (sessions dispatched before heartbeat feature).
|
|
1341
|
+
// Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
|
|
1097
1342
|
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1098
1343
|
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1099
1344
|
}
|
|
1100
|
-
} else {
|
|
1101
|
-
// No lastPing -- backward compat (sessions dispatched before heartbeat feature).
|
|
1102
|
-
// Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
|
|
1103
|
-
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1104
|
-
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1105
|
-
}
|
|
1106
1345
|
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1346
|
+
if (check.shouldResolve) {
|
|
1347
|
+
if (check.is529) {
|
|
1348
|
+
setLabel(label, {
|
|
1349
|
+
status: 'error',
|
|
1350
|
+
error: check.errorMsg || `529/overload: ${check.reason}`,
|
|
1351
|
+
summary: `Auto-resolved as error: ${check.reason}`,
|
|
1352
|
+
});
|
|
1353
|
+
syncAction = `auto-resolved as 529 error: ${check.reason}`;
|
|
1354
|
+
} else {
|
|
1355
|
+
setLabel(label, {
|
|
1356
|
+
status: 'interrupted',
|
|
1357
|
+
summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
|
|
1358
|
+
});
|
|
1359
|
+
syncAction = `auto-resolved as interrupted: ${check.reason}`;
|
|
1360
|
+
}
|
|
1361
|
+
// Disarm watchdog when session is auto-resolved
|
|
1362
|
+
disarmWatchdog(label);
|
|
1121
1363
|
}
|
|
1122
|
-
// Disarm watchdog when session is auto-resolved
|
|
1123
|
-
disarmWatchdog(label);
|
|
1124
1364
|
}
|
|
1125
1365
|
}
|
|
1126
1366
|
|
|
@@ -1128,6 +1368,9 @@ function cmdStatus(flags) {
|
|
|
1128
1368
|
if (entry.sessionKey && sessionsStore) {
|
|
1129
1369
|
const sessionEntry = sessionsStore[entry.sessionKey];
|
|
1130
1370
|
if (sessionEntry) {
|
|
1371
|
+
if (sessionEntry.sessionId && entry.sessionId !== sessionEntry.sessionId) {
|
|
1372
|
+
setLabel(label, { sessionId: sessionEntry.sessionId });
|
|
1373
|
+
}
|
|
1131
1374
|
liveness = {
|
|
1132
1375
|
updatedAt: sessionEntry.updatedAt,
|
|
1133
1376
|
ageMs: sessionEntry.updatedAt
|
|
@@ -1159,6 +1402,7 @@ function cmdStatus(flags) {
|
|
|
1159
1402
|
updatedAt: current.updatedAt,
|
|
1160
1403
|
summary: current.summary || null,
|
|
1161
1404
|
completion: current.completion || null,
|
|
1405
|
+
delivery: buildDispatchDeliverySurface(current),
|
|
1162
1406
|
error: current.error || null,
|
|
1163
1407
|
liveness,
|
|
1164
1408
|
...(syncAction ? { syncAction } : {}),
|
|
@@ -1192,7 +1436,7 @@ function hasActiveWatcher(label) {
|
|
|
1192
1436
|
r.status = 'running'
|
|
1193
1437
|
OR (r.status = 'pending' AND r.started_at > datetime('now','-5 minutes'))
|
|
1194
1438
|
)
|
|
1195
|
-
`).get(`%-deliver:${label}
|
|
1439
|
+
`).get(`%-deliver:${label}%`);
|
|
1196
1440
|
return (row?.c || 0) > 0;
|
|
1197
1441
|
} catch {
|
|
1198
1442
|
return false;
|
|
@@ -1346,6 +1590,28 @@ function cmdSync(flags) {
|
|
|
1346
1590
|
const syncStore = getSyncStore(entry);
|
|
1347
1591
|
const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
|
|
1348
1592
|
const elapsedMs = Date.now() - spawnedAtMs;
|
|
1593
|
+
const STARTUP_GRACE_MS_SYNC = config.startupGraceMs ?? 300_000;
|
|
1594
|
+
|
|
1595
|
+
const bootstrapFailure = !entry.lastPing
|
|
1596
|
+
? inspectSessionBootstrapFailure(
|
|
1597
|
+
entry.sessionKey,
|
|
1598
|
+
syncStore,
|
|
1599
|
+
spawnedAtMs,
|
|
1600
|
+
STARTUP_GRACE_MS_SYNC,
|
|
1601
|
+
)
|
|
1602
|
+
: { shouldResolve: false, reason: null, errorMsg: null };
|
|
1603
|
+
if (bootstrapFailure.shouldResolve) {
|
|
1604
|
+
changes.push({ label: name, from: 'running', to: 'error', reason: bootstrapFailure.reason });
|
|
1605
|
+
if (!dryRun) {
|
|
1606
|
+
setLabel(name, {
|
|
1607
|
+
status: 'error',
|
|
1608
|
+
error: bootstrapFailure.errorMsg,
|
|
1609
|
+
summary: `Synced as spawn failure: ${bootstrapFailure.reason}`,
|
|
1610
|
+
});
|
|
1611
|
+
disarmWatchdog(name);
|
|
1612
|
+
}
|
|
1613
|
+
continue;
|
|
1614
|
+
}
|
|
1349
1615
|
|
|
1350
1616
|
// -- Heartbeat-based liveness guard (mirrors cmdStatus logic) ---------
|
|
1351
1617
|
// Skip auto-resolve when the watcher's lastPing heartbeat is fresh.
|
|
@@ -1412,32 +1678,62 @@ function cmdResult(flags) {
|
|
|
1412
1678
|
return;
|
|
1413
1679
|
}
|
|
1414
1680
|
|
|
1415
|
-
//
|
|
1681
|
+
// Conservative transcript recovery:
|
|
1682
|
+
// - lastReply is ONLY populated from a terminal JSONL-scoped assistant reply
|
|
1683
|
+
// - diagnosticReply captures the last meaningful assistant text for timeout reporting
|
|
1416
1684
|
let lastReply = null;
|
|
1685
|
+
let diagnosticReply = null;
|
|
1686
|
+
let recoverySource = null;
|
|
1687
|
+
let recoverySessionId = entry.sessionId || null;
|
|
1688
|
+
const resultAgent = entry.agent || agentFromSessionKey(entry.sessionKey) || 'main';
|
|
1689
|
+
const resultStore = entry.sessionKey ? readSessionsStore(resultAgent) : null;
|
|
1690
|
+
const resultSessionEntry = entry.sessionKey && resultStore ? resultStore[entry.sessionKey] : null;
|
|
1691
|
+
|
|
1692
|
+
if (resultSessionEntry?.sessionId) {
|
|
1693
|
+
recoverySessionId = resultSessionEntry.sessionId;
|
|
1694
|
+
if (entry.sessionId !== recoverySessionId) {
|
|
1695
|
+
setLabel(label, { sessionId: recoverySessionId });
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
if (recoverySessionId) {
|
|
1700
|
+
const jsonlEntries = readJsonlTailEntries(recoverySessionId, resultAgent, 200);
|
|
1701
|
+
const terminalReply = extractTerminalAssistantReplyFromEntries(jsonlEntries);
|
|
1702
|
+
const jsonlDiagnostic = extractLastMeaningfulAssistantReplyFromEntries(jsonlEntries);
|
|
1703
|
+
|
|
1704
|
+
if (terminalReply) {
|
|
1705
|
+
lastReply = terminalReply;
|
|
1706
|
+
recoverySource = 'jsonl-terminal';
|
|
1707
|
+
}
|
|
1708
|
+
if (jsonlDiagnostic) {
|
|
1709
|
+
diagnosticReply = jsonlDiagnostic;
|
|
1710
|
+
if (!recoverySource) recoverySource = 'jsonl-diagnostic';
|
|
1711
|
+
}
|
|
1712
|
+
}
|
|
1713
|
+
|
|
1417
1714
|
if (entry.sessionKey) {
|
|
1418
1715
|
try {
|
|
1419
1716
|
const result = gatewayCall('chat.history', {
|
|
1420
1717
|
sessionKey: entry.sessionKey,
|
|
1421
1718
|
}, { timeout: 10000 });
|
|
1422
1719
|
|
|
1423
|
-
if (result?.messages?.length) {
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
}
|
|
1720
|
+
if (result?.messages?.length && !diagnosticReply) {
|
|
1721
|
+
diagnosticReply = extractLastMeaningfulAssistantReplyFromEntries(result.messages);
|
|
1722
|
+
if (diagnosticReply && !recoverySource) recoverySource = 'history-diagnostic';
|
|
1723
|
+
}
|
|
1724
|
+
|
|
1725
|
+
if (!lastReply && result?.messages?.length) {
|
|
1726
|
+
const historyTerminal = extractTerminalAssistantReplyFromEntries(result.messages);
|
|
1727
|
+
if (historyTerminal) {
|
|
1728
|
+
lastReply = historyTerminal;
|
|
1729
|
+
recoverySource = 'history-terminal';
|
|
1434
1730
|
}
|
|
1435
1731
|
}
|
|
1436
1732
|
} catch {}
|
|
1437
1733
|
}
|
|
1438
1734
|
|
|
1439
1735
|
// -- Watchdog cleanup: disable watchdog job when result is available --
|
|
1440
|
-
if (lastReply && entry.watchdogJobId) {
|
|
1736
|
+
if ((lastReply || hasCompletionSignal(entry.completion)) && entry.watchdogJobId) {
|
|
1441
1737
|
disarmWatchdog(label);
|
|
1442
1738
|
}
|
|
1443
1739
|
|
|
@@ -1449,11 +1745,64 @@ function cmdResult(flags) {
|
|
|
1449
1745
|
spawnedAt: entry.spawnedAt,
|
|
1450
1746
|
summary: entry.summary || (lastReply ? lastReply.slice(0, 500) : null),
|
|
1451
1747
|
completion: entry.completion || null,
|
|
1748
|
+
delivery: buildDispatchDeliverySurface(entry),
|
|
1452
1749
|
lastReply: lastReply || null,
|
|
1750
|
+
diagnosticReply: diagnosticReply || lastReply || null,
|
|
1751
|
+
recovery: recoverySource || recoverySessionId ? {
|
|
1752
|
+
source: recoverySource || null,
|
|
1753
|
+
sessionId: recoverySessionId || null,
|
|
1754
|
+
} : null,
|
|
1453
1755
|
error: entry.error || null,
|
|
1454
1756
|
});
|
|
1455
1757
|
}
|
|
1456
1758
|
|
|
1759
|
+
|
|
1760
|
+
function cmdWatcherHandoff(flags) {
|
|
1761
|
+
const label = flags.label;
|
|
1762
|
+
const reason = flags.reason || null;
|
|
1763
|
+
if (!label) die('--label is required', 2);
|
|
1764
|
+
|
|
1765
|
+
const entry = getLabel(label);
|
|
1766
|
+
if (!entry) {
|
|
1767
|
+
out({ ok: false, scheduled: false, label, message: 'No session found for this label' });
|
|
1768
|
+
return;
|
|
1769
|
+
}
|
|
1770
|
+
|
|
1771
|
+
if (entry.status && entry.status !== 'running') {
|
|
1772
|
+
out({ ok: true, scheduled: false, label, reason: 'label already terminal', status: entry.status });
|
|
1773
|
+
return;
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
if (!entry.deliverTo || entry.deliveryMode === 'none') {
|
|
1777
|
+
out({ ok: true, scheduled: false, label, reason: 'delivery disabled for this label' });
|
|
1778
|
+
return;
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
const agentBrand = config.agents?.[entry.agent || 'main']?.name
|
|
1782
|
+
|| (entry.agent && entry.agent !== 'main' ? entry.agent : null)
|
|
1783
|
+
|| config.name
|
|
1784
|
+
|| BRAND;
|
|
1785
|
+
|
|
1786
|
+
const watcherJob = scheduleDeliveryWatcherJob({
|
|
1787
|
+
label,
|
|
1788
|
+
deliverTo: entry.deliverTo,
|
|
1789
|
+
deliverChannel: entry.deliverChannel || 'telegram',
|
|
1790
|
+
timeoutSeconds: Number(entry.timeoutSeconds ?? entry.timeout) || 300,
|
|
1791
|
+
idleThresholdSeconds: Number(entry.idleThresholdSeconds) || 300,
|
|
1792
|
+
origin: entry.origin || 'system',
|
|
1793
|
+
agentBrand,
|
|
1794
|
+
nameSuffix: `:handoff:${Date.now()}`,
|
|
1795
|
+
});
|
|
1796
|
+
|
|
1797
|
+
out({
|
|
1798
|
+
ok: true,
|
|
1799
|
+
scheduled: true,
|
|
1800
|
+
label,
|
|
1801
|
+
jobId: watcherJob?.id || null,
|
|
1802
|
+
reason,
|
|
1803
|
+
});
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1457
1806
|
/**
|
|
1458
1807
|
* done -- agent-side completion signal (push-based).
|
|
1459
1808
|
* Called by the subagent itself as its LAST action when fully complete.
|
|
@@ -1518,15 +1867,15 @@ async function cmdDone(flags) {
|
|
|
1518
1867
|
}
|
|
1519
1868
|
}
|
|
1520
1869
|
|
|
1521
|
-
//
|
|
1522
|
-
//
|
|
1523
|
-
//
|
|
1870
|
+
// Persist a first-class completion payload with deterministic delivery text
|
|
1871
|
+
// so the watcher/post-office path never depends solely on transcript recovery
|
|
1872
|
+
// or on whatever raw blob the model chose to print at the end.
|
|
1524
1873
|
const completion = buildTerminalCompletionPayload({
|
|
1525
1874
|
summary: rawSummary,
|
|
1526
1875
|
checklist,
|
|
1527
1876
|
sha,
|
|
1528
1877
|
});
|
|
1529
|
-
const summary = completion.summary ||
|
|
1878
|
+
const summary = completion.summary || null;
|
|
1530
1879
|
|
|
1531
1880
|
const existing = getLabel(label);
|
|
1532
1881
|
|
|
@@ -1642,7 +1991,7 @@ async function cmdDone(flags) {
|
|
|
1642
1991
|
// Label was never registered (e.g. direct subagent spawn, not via enqueue).
|
|
1643
1992
|
// This is not an error -- the work completed, the label just wasn't tracked.
|
|
1644
1993
|
process.stderr.write(`[${BRAND}] warn: no session found for label "${label}" -- registering as done\n`);
|
|
1645
|
-
|
|
1994
|
+
setLabelDone(label, { summary, completion, ...(sha ? { sha } : {}) });
|
|
1646
1995
|
|
|
1647
1996
|
// No watcher is polling for this label, so actively notify via the gateway
|
|
1648
1997
|
// post office using delivery config from config.json as fallback target.
|
|
@@ -1659,6 +2008,7 @@ async function cmdDone(flags) {
|
|
|
1659
2008
|
duration_ms: 0,
|
|
1660
2009
|
session_key: null,
|
|
1661
2010
|
summary,
|
|
2011
|
+
completion,
|
|
1662
2012
|
deliverTo,
|
|
1663
2013
|
deliveryChannel,
|
|
1664
2014
|
}).catch(() => {});
|
|
@@ -1670,8 +2020,7 @@ async function cmdDone(flags) {
|
|
|
1670
2020
|
return;
|
|
1671
2021
|
}
|
|
1672
2022
|
|
|
1673
|
-
|
|
1674
|
-
status: 'done',
|
|
2023
|
+
setLabelDone(label, {
|
|
1675
2024
|
summary,
|
|
1676
2025
|
completion,
|
|
1677
2026
|
...(sha ? { sha } : {}),
|
|
@@ -1690,6 +2039,8 @@ async function cmdDone(flags) {
|
|
|
1690
2039
|
status: 'ok',
|
|
1691
2040
|
duration_ms: Date.now() - spawnedAtMs,
|
|
1692
2041
|
session_key: existing.sessionKey || null,
|
|
2042
|
+
summary,
|
|
2043
|
+
completion,
|
|
1693
2044
|
}).catch(() => {});
|
|
1694
2045
|
|
|
1695
2046
|
out({ ok: true, label, status: 'done', summary, completion, message: 'Label marked done via agent signal.' });
|
|
@@ -1699,16 +2050,31 @@ async function cmdDone(flags) {
|
|
|
1699
2050
|
* send / steer -- send a message into a running session.
|
|
1700
2051
|
*
|
|
1701
2052
|
* Flags:
|
|
1702
|
-
* --label <string>
|
|
1703
|
-
* --message <string>
|
|
1704
|
-
* --
|
|
2053
|
+
* --label <string> Required (unless --session-key)
|
|
2054
|
+
* --message <string> Message to send
|
|
2055
|
+
* --message-file <path> Read message text from a file (`-` = stdin)
|
|
2056
|
+
* --message-env <VAR> Read message text from an environment variable
|
|
2057
|
+
* --message-stdin Read message text from stdin explicitly
|
|
2058
|
+
* (stdin is also auto-read when piped and no other message source is set)
|
|
2059
|
+
* --session-key <key> Optional. Direct session key (bypasses label lookup)
|
|
1705
2060
|
*/
|
|
1706
2061
|
async function cmdSend(flags) {
|
|
1707
|
-
const label
|
|
1708
|
-
const message = flags.message;
|
|
2062
|
+
const label = flags.label;
|
|
1709
2063
|
const directKey = flags['session-key'];
|
|
2064
|
+
let message = null;
|
|
1710
2065
|
|
|
1711
|
-
|
|
2066
|
+
try {
|
|
2067
|
+
message = await resolveMessageInput({
|
|
2068
|
+
message: flags.message,
|
|
2069
|
+
messageFile: flags['message-file'],
|
|
2070
|
+
messageEnv: flags['message-env'],
|
|
2071
|
+
messageStdin: flags['message-stdin'],
|
|
2072
|
+
});
|
|
2073
|
+
} catch (err) {
|
|
2074
|
+
die(err.message, 2);
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
if (message === null || message.length === 0) die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
|
|
1712
2078
|
if (!label && !directKey) die('--label or --session-key is required', 2);
|
|
1713
2079
|
|
|
1714
2080
|
let sessionKey = directKey;
|
|
@@ -1807,6 +2173,7 @@ function cmdList(flags) {
|
|
|
1807
2173
|
let entries = Object.entries(labels).map(([name, data]) => ({
|
|
1808
2174
|
label: name,
|
|
1809
2175
|
...data,
|
|
2176
|
+
delivery: buildDispatchDeliverySurface(data),
|
|
1810
2177
|
}));
|
|
1811
2178
|
|
|
1812
2179
|
if (filterStatus) {
|
|
@@ -1833,13 +2200,15 @@ ${BRAND} -- sub-agent dispatch CLI (native gateway API)
|
|
|
1833
2200
|
Usage: openclaw-scheduler <subcommand> [flags]
|
|
1834
2201
|
|
|
1835
2202
|
Subcommands:
|
|
1836
|
-
enqueue --label <l> --message <m>|--message-file <f
|
|
1837
|
-
[--timeout <s>] [--mode fresh|reuse] [--model <m>]
|
|
1838
|
-
[--origin <o>] (
|
|
2203
|
+
enqueue --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2204
|
+
[--agent <a>] [--thinking <t>] [--timeout <s>] [--mode fresh|reuse] [--model <m>]
|
|
2205
|
+
[--origin <o>] (recommended explicit value, e.g. "telegram:<chat_id>" or "system")
|
|
1839
2206
|
[--deliver-to <id>] [--deliver-channel <ch>] [--delivery-mode <m>]
|
|
1840
|
-
(--deliver-to
|
|
2207
|
+
(--deliver-to should come from inbound metadata chat_id; explicit --deliver-to becomes origin when --origin is omitted)
|
|
2208
|
+
(active-session auto-detect is preserved only as a manual/local fallback)
|
|
1841
2209
|
[--no-monitor] [--monitor-interval <cron>] [--monitor-timeout <min>]
|
|
1842
2210
|
[--verify-cmd <shell_cmd>]
|
|
2211
|
+
(stdin is auto-read when piped and no explicit message source is set)
|
|
1843
2212
|
|
|
1844
2213
|
status --label <l>
|
|
1845
2214
|
|
|
@@ -1847,9 +2216,13 @@ Subcommands:
|
|
|
1847
2216
|
|
|
1848
2217
|
result --label <l>
|
|
1849
2218
|
|
|
1850
|
-
|
|
2219
|
+
watcher-handoff --label <l> [--reason <text>]
|
|
2220
|
+
|
|
2221
|
+
send --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2222
|
+
[--session-key <k>]
|
|
1851
2223
|
|
|
1852
|
-
steer --label <l> --message <m
|
|
2224
|
+
steer --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2225
|
+
(alias for send)
|
|
1853
2226
|
|
|
1854
2227
|
heartbeat --label <l> OR --session-key <k>
|
|
1855
2228
|
|
|
@@ -1871,6 +2244,7 @@ switch (subcommand) {
|
|
|
1871
2244
|
case 'status': cmdStatus(flags); break;
|
|
1872
2245
|
case 'stuck': await cmdStuck(flags); break;
|
|
1873
2246
|
case 'result': cmdResult(flags); break;
|
|
2247
|
+
case 'watcher-handoff': cmdWatcherHandoff(flags); break;
|
|
1874
2248
|
case 'send': await cmdSend(flags); break;
|
|
1875
2249
|
case 'steer': await cmdSend(flags); break;
|
|
1876
2250
|
case 'heartbeat': cmdHeartbeat(flags); break;
|