openclaw-scheduler 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +16 -6
- package/cli.js +13 -4
- package/dispatch/README.md +18 -3
- package/dispatch/completion.mjs +1035 -34
- package/dispatch/hooks.mjs +17 -5
- package/dispatch/index.mjs +573 -217
- package/dispatch/message-input.mjs +67 -0
- package/dispatch/watcher.mjs +110 -39
- package/dispatcher-strategies.js +121 -20
- package/gateway.js +32 -8
- package/index.d.ts +1 -0
- package/package.json +3 -1
- package/scripts/dispatch-cli-utils.mjs +53 -0
- package/scripts/inbox-watcher-guardrail.mjs +506 -0
package/dispatch/index.mjs
CHANGED
|
@@ -32,8 +32,17 @@ import { randomUUID } from 'crypto';
|
|
|
32
32
|
import { execFileSync } from 'child_process';
|
|
33
33
|
import { homedir } from 'os';
|
|
34
34
|
import Database from 'better-sqlite3';
|
|
35
|
-
import {
|
|
35
|
+
import {
|
|
36
|
+
buildCompletionSignalInstructions,
|
|
37
|
+
buildTerminalCompletionPayload,
|
|
38
|
+
extractLastMeaningfulAssistantReplyFromEntries,
|
|
39
|
+
extractTerminalAssistantReplyFromEntries,
|
|
40
|
+
hasCompletionSignal,
|
|
41
|
+
taskRequiresGitSha,
|
|
42
|
+
} from './completion.mjs';
|
|
36
43
|
import { onStarted, onFinished, onStuck } from './hooks.mjs';
|
|
44
|
+
import { resolveMessageInput } from './message-input.mjs';
|
|
45
|
+
import { buildDispatchDeliverySurface } from '../scripts/dispatch-cli-utils.mjs';
|
|
37
46
|
|
|
38
47
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
39
48
|
const HOME_DIR = process.env.HOME || homedir();
|
|
@@ -110,6 +119,15 @@ function sleep(ms) {
|
|
|
110
119
|
return new Promise(r => setTimeout(r, ms));
|
|
111
120
|
}
|
|
112
121
|
|
|
122
|
+
function toTimestampMs(value) {
|
|
123
|
+
if (value == null) return null;
|
|
124
|
+
if (typeof value === 'number') {
|
|
125
|
+
return value < 1e12 ? value * 1000 : value;
|
|
126
|
+
}
|
|
127
|
+
const parsed = new Date(value).getTime();
|
|
128
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
129
|
+
}
|
|
130
|
+
|
|
113
131
|
/** Parse --flag value pairs from argv (supports both --flag value and --flag=value) */
|
|
114
132
|
function parseFlags(argv) {
|
|
115
133
|
const flags = {};
|
|
@@ -131,21 +149,6 @@ function parseFlags(argv) {
|
|
|
131
149
|
return flags;
|
|
132
150
|
}
|
|
133
151
|
|
|
134
|
-
function taskRequiresGitSha(taskPrompt) {
|
|
135
|
-
if (!taskPrompt || typeof taskPrompt !== 'string') return false;
|
|
136
|
-
|
|
137
|
-
const commandPattern = /\bgit\s+(push|rebase|cherry-pick)\b|(?:^|\s)--force-with-lease\b|(?:^|\s)--force-push\b/ig;
|
|
138
|
-
let match;
|
|
139
|
-
while ((match = commandPattern.exec(taskPrompt)) !== null) {
|
|
140
|
-
const before = taskPrompt.slice(Math.max(0, match.index - 40), match.index);
|
|
141
|
-
const negatedContext = /\b(?:do\s+not|don't|dont|never)\s+(?:use|run|call|invoke)?\s*$/i.test(before)
|
|
142
|
-
|| /\bavoid\s+(?:using\s+)?$/i.test(before)
|
|
143
|
-
|| /\bwithout\s+(?:using\s+)?$/i.test(before);
|
|
144
|
-
if (!negatedContext) return true;
|
|
145
|
-
}
|
|
146
|
-
return false;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
152
|
// -- Labels Ledger --------------------------------------------
|
|
150
153
|
|
|
151
154
|
function getLabelsSignature() {
|
|
@@ -247,23 +250,16 @@ function gatewayCall(method, params = {}, opts = {}) {
|
|
|
247
250
|
// -- Gateway Error Log Check ----------------------------------
|
|
248
251
|
|
|
249
252
|
/**
|
|
250
|
-
* Check the gateway error log for
|
|
253
|
+
* Check the gateway error log for the most recent diagnostic lane task error
|
|
251
254
|
* matching a specific session key.
|
|
252
255
|
*
|
|
253
256
|
* Scans the last N bytes of gateway.err.log for diagnostic lane task errors
|
|
254
|
-
* that reference the session key and
|
|
257
|
+
* that reference the session key and returns the newest error line.
|
|
255
258
|
*
|
|
256
259
|
* @param {string} sessionKey - The session key to check
|
|
257
260
|
* @returns {{ found: boolean, error: string|null, timestamp: string|null }}
|
|
258
261
|
*/
|
|
259
|
-
function
|
|
260
|
-
const OVERLOAD_PATTERNS = [
|
|
261
|
-
/529/i,
|
|
262
|
-
/failover\s*error/i,
|
|
263
|
-
/overload/i,
|
|
264
|
-
/temporarily\s+overloaded/i,
|
|
265
|
-
];
|
|
266
|
-
|
|
262
|
+
function getGatewayLaneTaskError(sessionKey) {
|
|
267
263
|
try {
|
|
268
264
|
const logPath = join(HOME_DIR, '.openclaw', 'logs', 'gateway.err.log');
|
|
269
265
|
if (!existsSync(logPath)) return { found: false, error: null, timestamp: null };
|
|
@@ -285,20 +281,15 @@ function check529InGatewayLog(sessionKey) {
|
|
|
285
281
|
if (!line.includes(sessionKey)) continue;
|
|
286
282
|
if (!line.includes('lane task error')) continue;
|
|
287
283
|
|
|
288
|
-
// Extract the error message
|
|
289
284
|
const errorMatch = line.match(/error="([^"]+)"/);
|
|
290
285
|
if (!errorMatch) continue;
|
|
291
286
|
|
|
292
|
-
const
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
error: `FailoverError (529): ${errorMsg}`,
|
|
299
|
-
timestamp: tsMatch ? tsMatch[1] : null,
|
|
300
|
-
};
|
|
301
|
-
}
|
|
287
|
+
const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)/);
|
|
288
|
+
return {
|
|
289
|
+
found: true,
|
|
290
|
+
error: errorMatch[1],
|
|
291
|
+
timestamp: tsMatch ? tsMatch[1] : null,
|
|
292
|
+
};
|
|
302
293
|
}
|
|
303
294
|
|
|
304
295
|
return { found: false, error: null, timestamp: null };
|
|
@@ -307,6 +298,32 @@ function check529InGatewayLog(sessionKey) {
|
|
|
307
298
|
}
|
|
308
299
|
}
|
|
309
300
|
|
|
301
|
+
/**
|
|
302
|
+
* Check the gateway error log for 529/FailoverError/overload errors
|
|
303
|
+
* matching a specific session key.
|
|
304
|
+
*
|
|
305
|
+
* @param {string} sessionKey - The session key to check
|
|
306
|
+
* @returns {{ found: boolean, error: string|null, timestamp: string|null }}
|
|
307
|
+
*/
|
|
308
|
+
function check529InGatewayLog(sessionKey) {
|
|
309
|
+
const OVERLOAD_PATTERNS = [
|
|
310
|
+
/529/i,
|
|
311
|
+
/failover\s*error/i,
|
|
312
|
+
/overload/i,
|
|
313
|
+
/temporarily\s+overloaded/i,
|
|
314
|
+
];
|
|
315
|
+
|
|
316
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
317
|
+
if (!laneError.found || !laneError.error) return { found: false, error: null, timestamp: null };
|
|
318
|
+
if (!OVERLOAD_PATTERNS.some(p => p.test(laneError.error))) return { found: false, error: null, timestamp: null };
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
found: true,
|
|
322
|
+
error: `FailoverError (529): ${laneError.error}`,
|
|
323
|
+
timestamp: laneError.timestamp,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
310
327
|
// -- Sessions Store (Direct Read) -----------------------------
|
|
311
328
|
|
|
312
329
|
/**
|
|
@@ -328,6 +345,104 @@ function readSessionsStore(agent = 'main') {
|
|
|
328
345
|
}
|
|
329
346
|
}
|
|
330
347
|
|
|
348
|
+
function getSessionJsonlPath(agent = 'main', sessionId) {
|
|
349
|
+
if (!sessionId) return null;
|
|
350
|
+
return join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function inspectSessionActivitySignal(sessionKey, sessionsStore) {
|
|
354
|
+
if (!sessionKey || !sessionsStore?.[sessionKey]) {
|
|
355
|
+
return { found: false, hasActivitySignal: false, messageCount: null, jsonlExists: false, hasTokens: false, updatedAtMs: null };
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const agent = agentFromSessionKey(sessionKey) || 'main';
|
|
359
|
+
const entry = sessionsStore[sessionKey];
|
|
360
|
+
const jsonlPath = getSessionJsonlPath(agent, entry.sessionId);
|
|
361
|
+
const jsonlExists = jsonlPath ? existsSync(jsonlPath) : false;
|
|
362
|
+
const hasTokens = typeof entry.totalTokens === 'number' && entry.totalTokens > 0;
|
|
363
|
+
let messageCount = null;
|
|
364
|
+
|
|
365
|
+
try {
|
|
366
|
+
const history = gatewayCall('chat.history', { sessionKey }, { timeout: 8000 });
|
|
367
|
+
if (Array.isArray(history?.messages)) {
|
|
368
|
+
messageCount = history.messages.length;
|
|
369
|
+
}
|
|
370
|
+
} catch {}
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
found: true,
|
|
374
|
+
hasActivitySignal: jsonlExists || hasTokens || (typeof messageCount === 'number' && messageCount > 0),
|
|
375
|
+
messageCount,
|
|
376
|
+
jsonlExists,
|
|
377
|
+
hasTokens,
|
|
378
|
+
updatedAtMs: toTimestampMs(entry.updatedAt),
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
function inspectSessionBootstrapFailure(sessionKey, sessionsStore, spawnedAtMs, startupGraceMs) {
|
|
383
|
+
if (!sessionKey || !sessionsStore?.[sessionKey]) {
|
|
384
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const ageMs = spawnedAtMs ? Date.now() - spawnedAtMs : Infinity;
|
|
388
|
+
if (ageMs < startupGraceMs || ageMs > startupGraceMs * 2) {
|
|
389
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const signal = inspectSessionActivitySignal(sessionKey, sessionsStore);
|
|
393
|
+
if (signal.hasActivitySignal) {
|
|
394
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
398
|
+
if (laneError.found && laneError.error) {
|
|
399
|
+
return {
|
|
400
|
+
shouldResolve: true,
|
|
401
|
+
reason: `diagnostic lane error: ${laneError.error}`,
|
|
402
|
+
errorMsg: `spawn-failure: ${laneError.error}`,
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
if (signal.messageCount === 0) {
|
|
407
|
+
return {
|
|
408
|
+
shouldResolve: true,
|
|
409
|
+
reason: 'session entered sessions store but never wrote transcript/history',
|
|
410
|
+
errorMsg: 'spawn-failure: session entered sessions store but never wrote transcript/history',
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
if (signal.updatedAtMs !== null && spawnedAtMs && signal.updatedAtMs <= spawnedAtMs + 5000) {
|
|
415
|
+
return {
|
|
416
|
+
shouldResolve: true,
|
|
417
|
+
reason: 'session entered sessions store but never showed any activity',
|
|
418
|
+
errorMsg: 'spawn-failure: session entered sessions store but never showed any activity',
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
return { shouldResolve: false, reason: null, errorMsg: null };
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function readJsonlTailEntries(sessionId, agent = 'main', maxLines = 200) {
|
|
426
|
+
if (!sessionId) return null;
|
|
427
|
+
try {
|
|
428
|
+
const jsonlPath = join(HOME_DIR, '.openclaw', 'agents', agent, 'sessions', `${sessionId}.jsonl`);
|
|
429
|
+
return readFileSync(jsonlPath, 'utf-8')
|
|
430
|
+
.split('\n')
|
|
431
|
+
.filter(line => line.trim())
|
|
432
|
+
.slice(-maxLines)
|
|
433
|
+
.map(line => {
|
|
434
|
+
try {
|
|
435
|
+
return JSON.parse(line);
|
|
436
|
+
} catch {
|
|
437
|
+
return null;
|
|
438
|
+
}
|
|
439
|
+
})
|
|
440
|
+
.filter(Boolean);
|
|
441
|
+
} catch {
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
331
446
|
/**
|
|
332
447
|
* Auto-detect the originating channel from the most recently active main session.
|
|
333
448
|
* Reads sessions.json, finds sessions active within the last 10 minutes,
|
|
@@ -348,6 +463,17 @@ function inferChatType(key, session) {
|
|
|
348
463
|
return "";
|
|
349
464
|
}
|
|
350
465
|
|
|
466
|
+
function parseOriginTarget(origin) {
|
|
467
|
+
const match = /^([^:]+):(.+)$/.exec(origin || '');
|
|
468
|
+
if (!match) return { channel: null, target: null };
|
|
469
|
+
return { channel: match[1], target: match[2] };
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function originFromDeliveryTarget(deliverTo, deliverChannel = 'telegram') {
|
|
473
|
+
if (!deliverTo) return null;
|
|
474
|
+
return `${deliverChannel || 'telegram'}:${deliverTo}`;
|
|
475
|
+
}
|
|
476
|
+
|
|
351
477
|
function getActiveOriginFromSessions() {
|
|
352
478
|
const store = readSessionsStore("main");
|
|
353
479
|
if (!store) return null;
|
|
@@ -551,6 +677,65 @@ function disarmWatchdog(label) {
|
|
|
551
677
|
}
|
|
552
678
|
}
|
|
553
679
|
|
|
680
|
+
|
|
681
|
+
function quoteForSingleQuotedShell(value) {
|
|
682
|
+
return String(value).replace(/'/g, "'\"'\"'");
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Schedule a one-shot delivery watcher shell job for a dispatch label.
|
|
687
|
+
* Used both for the initial watcher registration and SIGTERM handoffs.
|
|
688
|
+
*/
|
|
689
|
+
function scheduleDeliveryWatcherJob({
|
|
690
|
+
label,
|
|
691
|
+
deliverTo,
|
|
692
|
+
deliverChannel = 'telegram',
|
|
693
|
+
timeoutSeconds = 300,
|
|
694
|
+
idleThresholdSeconds = 300,
|
|
695
|
+
origin = 'system',
|
|
696
|
+
agentBrand = BRAND,
|
|
697
|
+
nameSuffix = '',
|
|
698
|
+
}) {
|
|
699
|
+
if (!label) throw new Error('label is required');
|
|
700
|
+
if (!deliverTo) throw new Error('deliverTo is required');
|
|
701
|
+
|
|
702
|
+
const schedulerCli = join(__dirname, '..', 'cli.js');
|
|
703
|
+
const watcherPath = join(__dirname, 'watcher.mjs');
|
|
704
|
+
const watcherTimeoutS = Number(timeoutSeconds) + 120;
|
|
705
|
+
const idleThresholdS = Number(idleThresholdSeconds) || 300;
|
|
706
|
+
const sq = quoteForSingleQuotedShell;
|
|
707
|
+
const watcherCmd = `DISPATCH_LABELS_PATH='${sq(LABELS_PATH)}' '${sq(process.execPath)}' '${sq(watcherPath)}' --label '${sq(label)}' --timeout ${watcherTimeoutS} --poll-interval 20 --idle-threshold ${idleThresholdS}`;
|
|
708
|
+
|
|
709
|
+
const nowUtc = new Date().toISOString().replace('T', ' ').slice(0, 19);
|
|
710
|
+
const jobSpec = {
|
|
711
|
+
name: `${agentBrand}-deliver:${label}${nameSuffix}`,
|
|
712
|
+
schedule_kind: 'at',
|
|
713
|
+
schedule_at: nowUtc,
|
|
714
|
+
session_target: 'shell',
|
|
715
|
+
payload_kind: 'shellCommand',
|
|
716
|
+
payload_message: watcherCmd,
|
|
717
|
+
delivery_mode: 'announce-always',
|
|
718
|
+
delivery_channel: deliverChannel,
|
|
719
|
+
delivery_to: deliverTo,
|
|
720
|
+
delivery_guarantee: 'at-least-once',
|
|
721
|
+
ttl_hours: config.deliver_watcher_ttl_hours ?? 48,
|
|
722
|
+
overlap_policy: 'skip',
|
|
723
|
+
run_timeout_ms: Math.max(watcherTimeoutS, 4 * 3600) * 1000
|
|
724
|
+
+ 420 * 1000,
|
|
725
|
+
delete_after_run: 1,
|
|
726
|
+
origin: origin || 'system',
|
|
727
|
+
};
|
|
728
|
+
|
|
729
|
+
const raw = execFileSync(process.execPath, [schedulerCli, '--json', 'jobs', 'add', JSON.stringify(jobSpec)], {
|
|
730
|
+
encoding: 'utf-8',
|
|
731
|
+
timeout: 10000,
|
|
732
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
733
|
+
});
|
|
734
|
+
|
|
735
|
+
const parsed = JSON.parse(raw.trim());
|
|
736
|
+
return parsed?.job || null;
|
|
737
|
+
}
|
|
738
|
+
|
|
554
739
|
// -- Session Helpers ------------------------------------------
|
|
555
740
|
|
|
556
741
|
/** Build a unique session key for a new subagent session. */
|
|
@@ -565,12 +750,19 @@ function makeSessionKey(agentId) {
|
|
|
565
750
|
*
|
|
566
751
|
* Flags:
|
|
567
752
|
* --label <string> Required. Human-readable name
|
|
568
|
-
* --message <string>
|
|
753
|
+
* --message <string> Prompt sent to the agent
|
|
754
|
+
* --message-file <path> Read prompt text from a file (`-` = stdin)
|
|
755
|
+
* --message-env <VAR> Read prompt text from an environment variable
|
|
756
|
+
* --message-stdin Read prompt text from stdin explicitly
|
|
757
|
+
* (stdin is also auto-read when piped and no other message source is set)
|
|
569
758
|
* --agent <string> Agent ID (default: main)
|
|
570
759
|
* --thinking <string> Reasoning level: low|high|xhigh (default: not set)
|
|
571
760
|
* --timeout <seconds> Run timeout in seconds (default: 300)
|
|
572
|
-
* --origin <origin>
|
|
573
|
-
*
|
|
761
|
+
* --origin <origin> Explicit dispatch origin for audit/retries (e.g. "telegram:<chat_id>", "system")
|
|
762
|
+
* If omitted but --deliver-to is explicit, dispatch derives origin from that target.
|
|
763
|
+
* Active-session auto-detect is preserved only as a manual/local fallback when both are absent.
|
|
764
|
+
* --deliver-to <target> Delivery target (e.g. Telegram chat ID). Registers the scheduler watcher for durable final delivery.
|
|
765
|
+
* Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.
|
|
574
766
|
* Defaults to origin chat ID when --origin is a "telegram:<id>" string.
|
|
575
767
|
* --deliver-channel <ch> Delivery channel for --deliver-to (default: telegram)
|
|
576
768
|
* --delivery-mode <mode> announce|announce-always|none (default: announce)
|
|
@@ -581,18 +773,23 @@ function makeSessionKey(agentId) {
|
|
|
581
773
|
* --model <string> Model override (e.g. anthropic/claude-sonnet-4-6)
|
|
582
774
|
*/
|
|
583
775
|
async function cmdEnqueue(flags) {
|
|
584
|
-
const label
|
|
585
|
-
let message = flags.message;
|
|
776
|
+
const label = flags.label;
|
|
586
777
|
if (!label) die('--label is required', 2);
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
778
|
+
|
|
779
|
+
let message = null;
|
|
780
|
+
try {
|
|
781
|
+
message = await resolveMessageInput({
|
|
782
|
+
message: flags.message,
|
|
783
|
+
messageFile: flags['message-file'],
|
|
784
|
+
messageEnv: flags['message-env'],
|
|
785
|
+
messageStdin: flags['message-stdin'],
|
|
786
|
+
});
|
|
787
|
+
} catch (err) {
|
|
788
|
+
die(err.message, 2);
|
|
789
|
+
}
|
|
790
|
+
if (message === null || message.length === 0) {
|
|
791
|
+
die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
|
|
594
792
|
}
|
|
595
|
-
if (!message) die('--message or --message-file is required', 2);
|
|
596
793
|
|
|
597
794
|
const agent = flags.agent || 'main';
|
|
598
795
|
const thinking = flags.thinking || null;
|
|
@@ -605,30 +802,44 @@ async function cmdEnqueue(flags) {
|
|
|
605
802
|
process.stderr.write(`[${BRAND}] WARNING: --timeout not specified, defaulting to 300s. ` +
|
|
606
803
|
`Pass --timeout explicitly (≥1200 for thinking=high tasks) to avoid premature watcher kills.\n`);
|
|
607
804
|
}
|
|
608
|
-
|
|
805
|
+
const explicitOrigin = flags.origin || null;
|
|
806
|
+
const explicitDeliverTo = flags['deliver-to'] || null;
|
|
807
|
+
const explicitDeliverChannel = flags['deliver-channel'] || null;
|
|
808
|
+
let origin = explicitOrigin;
|
|
809
|
+
|
|
810
|
+
// Contract: chat-triggered callers should pass --deliver-to from inbound
|
|
811
|
+
// metadata chat_id. If they omit --origin, derive it from that explicit
|
|
812
|
+
// delivery target so dispatch never falls back to whichever session happened
|
|
813
|
+
// to be active most recently.
|
|
814
|
+
if (!origin && explicitDeliverTo) {
|
|
815
|
+
origin = originFromDeliveryTarget(explicitDeliverTo, explicitDeliverChannel || 'telegram');
|
|
816
|
+
}
|
|
609
817
|
|
|
610
|
-
//
|
|
611
|
-
|
|
818
|
+
// Preserve active-session inference only as a manual/local fallback when the
|
|
819
|
+
// caller truly omitted both origin and delivery target.
|
|
820
|
+
if (!origin && !explicitDeliverTo) {
|
|
612
821
|
origin = getActiveOriginFromSessions();
|
|
613
822
|
if (origin) {
|
|
614
823
|
process.stderr.write(`[${BRAND}] auto-detected origin from active session: ${origin}\n`);
|
|
824
|
+
process.stderr.write(`[${BRAND}] NOTE: active-session origin detection is a manual/local fallback. ` +
|
|
825
|
+
`Chat-triggered callers should pass --deliver-to from inbound metadata chat_id.\n`);
|
|
615
826
|
}
|
|
616
827
|
}
|
|
617
828
|
|
|
618
829
|
// -- Auto-derive deliver-to from origin ---------------------------------
|
|
619
830
|
// If origin is "telegram:<id>", use <id> as the default deliver-to target.
|
|
620
831
|
let defaultDeliverTo = null;
|
|
621
|
-
let defaultDeliverCh = 'telegram';
|
|
832
|
+
let defaultDeliverCh = explicitDeliverChannel || 'telegram';
|
|
622
833
|
if (origin) {
|
|
623
|
-
const
|
|
624
|
-
if (
|
|
625
|
-
defaultDeliverCh
|
|
626
|
-
defaultDeliverTo
|
|
834
|
+
const { channel, target } = parseOriginTarget(origin);
|
|
835
|
+
if (channel && target) {
|
|
836
|
+
if (!explicitDeliverChannel) defaultDeliverCh = channel;
|
|
837
|
+
defaultDeliverTo = target;
|
|
627
838
|
}
|
|
628
839
|
}
|
|
629
840
|
|
|
630
|
-
const deliverTo =
|
|
631
|
-
const deliverChannel =
|
|
841
|
+
const deliverTo = explicitDeliverTo || defaultDeliverTo;
|
|
842
|
+
const deliverChannel = explicitDeliverChannel || defaultDeliverCh || 'telegram';
|
|
632
843
|
const deliverMode = flags['delivery-mode'] || 'announce';
|
|
633
844
|
const mode = flags.mode || 'fresh';
|
|
634
845
|
|
|
@@ -645,6 +856,9 @@ async function cmdEnqueue(flags) {
|
|
|
645
856
|
|
|
646
857
|
// -- Watchdog monitoring flags -----------------------------
|
|
647
858
|
const noMonitorRaw = flags['no-monitor'];
|
|
859
|
+
const noMonitorReason = typeof noMonitorRaw === 'string' && noMonitorRaw.trim()
|
|
860
|
+
? noMonitorRaw.trim()
|
|
861
|
+
: null;
|
|
648
862
|
const noMonitor = !!noMonitorRaw;
|
|
649
863
|
const monitorEnabled = !noMonitor && flags.monitor !== 'false';
|
|
650
864
|
const monitorInterval = flags['monitor-interval'] || config.watchdogIntervalCron || '*/15 * * * *';
|
|
@@ -659,6 +873,7 @@ async function cmdEnqueue(flags) {
|
|
|
659
873
|
"REJECTED: --deliver-to is required for dispatch jobs.\n" +
|
|
660
874
|
"Pass --deliver-to <chat_id> (e.g. --deliver-to -100200000000 for a group, " +
|
|
661
875
|
"or --deliver-to 123456789 for a DM).\n" +
|
|
876
|
+
"Chat-triggered callers should pass inbound metadata chat_id here, especially for group chats.\n" +
|
|
662
877
|
"Alternatively, pass --origin telegram:<chat_id> to auto-derive the delivery target.\n" +
|
|
663
878
|
"Pass --no-monitor \"<reason>\" only if you explicitly want to skip delivery (audit trail required).",
|
|
664
879
|
2
|
|
@@ -769,26 +984,11 @@ async function cmdEnqueue(flags) {
|
|
|
769
984
|
const doneScriptPath = join(__dirname, 'index.mjs');
|
|
770
985
|
parts.push(``);
|
|
771
986
|
parts.push(`---`);
|
|
772
|
-
parts.push(
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
parts.push(` 3. All API calls (e.g. GitHub comment replies) are done`);
|
|
778
|
-
parts.push(` 4. You have verified the work is complete`);
|
|
779
|
-
parts.push(``);
|
|
780
|
-
parts.push(`Call this as your ABSOLUTE FINAL action -- nothing else runs after this:`);
|
|
781
|
-
parts.push(` node '${doneScriptPath}' done --label '${label.replace(/'/g, "'\\''")}' \\`);
|
|
782
|
-
parts.push(` --summary "<what you actually did>" \\`);
|
|
783
|
-
parts.push(` --checklist '{"work_complete":true,"tests_passed":true,"pushed":true}' \\`);
|
|
784
|
-
parts.push(` [--sha "<git commit SHA if applicable>"]`);
|
|
785
|
-
parts.push(``);
|
|
786
|
-
parts.push(`Checklist rules:`);
|
|
787
|
-
parts.push(` - work_complete MUST be true -- you are asserting you have finished ALL assigned work`);
|
|
788
|
-
parts.push(` - If tests failed or push failed, do NOT set tests_passed:true or pushed:true -- instead continue working`);
|
|
789
|
-
parts.push(` - Only include tests_passed/pushed if they apply to your task`);
|
|
790
|
-
parts.push(`If your task involved git commits, --sha is required and must be the actual SHA of your pushed commit. The done script will reject invented or placeholder SHAs.`);
|
|
791
|
-
parts.push(`Do NOT call done while planning, reading files, or mid-task. If you have not yet pushed a commit, you are not done.`);
|
|
987
|
+
parts.push(buildCompletionSignalInstructions({
|
|
988
|
+
label,
|
|
989
|
+
taskPrompt: message,
|
|
990
|
+
doneScriptPath,
|
|
991
|
+
}));
|
|
792
992
|
parts.push(`---`);
|
|
793
993
|
parts.push(``);
|
|
794
994
|
parts.push(`---`);
|
|
@@ -802,15 +1002,16 @@ async function cmdEnqueue(flags) {
|
|
|
802
1002
|
const taskMessage = parts.join('\n');
|
|
803
1003
|
|
|
804
1004
|
// -- Call gateway agent method -------------------------------
|
|
805
|
-
//
|
|
806
|
-
//
|
|
807
|
-
//
|
|
1005
|
+
// Final user delivery belongs to the scheduler watcher below.
|
|
1006
|
+
// Keep the gateway spawn fire-and-forget so raw tool output or internal
|
|
1007
|
+
// done payloads cannot leak directly to the chat ahead of the durable
|
|
1008
|
+
// post-office delivery path.
|
|
808
1009
|
try {
|
|
809
1010
|
const response = gatewayCall('agent', {
|
|
810
1011
|
message: taskMessage,
|
|
811
1012
|
sessionKey,
|
|
812
1013
|
idempotencyKey: idem,
|
|
813
|
-
deliver:
|
|
1014
|
+
deliver: false,
|
|
814
1015
|
lane: 'subagent',
|
|
815
1016
|
timeout: timeoutS,
|
|
816
1017
|
label: label,
|
|
@@ -822,6 +1023,11 @@ async function cmdEnqueue(flags) {
|
|
|
822
1023
|
} : {}),
|
|
823
1024
|
}, { timeout: 15000 });
|
|
824
1025
|
|
|
1026
|
+
const deliveryDisabled = !deliverTo && noMonitor;
|
|
1027
|
+
const deliveryDisabledReason = deliveryDisabled
|
|
1028
|
+
? (noMonitorReason || 'explicit opt-out via --no-monitor')
|
|
1029
|
+
: null;
|
|
1030
|
+
|
|
825
1031
|
// Update ledger
|
|
826
1032
|
setLabel(label, {
|
|
827
1033
|
sessionKey,
|
|
@@ -834,9 +1040,12 @@ async function cmdEnqueue(flags) {
|
|
|
834
1040
|
deliverTo: deliverTo || null,
|
|
835
1041
|
deliverChannel: deliverChannel || null,
|
|
836
1042
|
deliveryMode: deliverMode || null,
|
|
1043
|
+
deliveryDisabled,
|
|
1044
|
+
deliveryDisabledReason,
|
|
837
1045
|
verifyCmd: verifyCmd || null,
|
|
838
1046
|
spawnedAt: new Date().toISOString(),
|
|
839
1047
|
timeoutSeconds: timeoutS,
|
|
1048
|
+
idleThresholdSeconds: parseInt(flags['idle-threshold'] || '300', 10),
|
|
840
1049
|
// Fix 4: Store timeout so cmdDone threshold logic can use it correctly.
|
|
841
1050
|
timeout: timeoutS,
|
|
842
1051
|
status: 'running',
|
|
@@ -882,48 +1091,25 @@ async function cmdEnqueue(flags) {
|
|
|
882
1091
|
// Creates a one-shot shell job that runs watcher.mjs (blocks until session
|
|
883
1092
|
// completes, outputs result). The scheduler's handleDelivery delivers with
|
|
884
1093
|
// retry, alias resolution, and audit trail in scheduler.db.
|
|
885
|
-
//
|
|
1094
|
+
// The watcher is the only final-delivery path for dispatched jobs.
|
|
886
1095
|
const sq = s => String(s).replace(/'/g, "'\\''");
|
|
887
1096
|
let schedulerWatcherOk = false;
|
|
888
1097
|
if (deliverTo && deliverMode !== 'none') {
|
|
889
1098
|
try {
|
|
890
|
-
const
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
name: `${agentBrand}-deliver:${label}`,
|
|
899
|
-
schedule_kind: 'at',
|
|
900
|
-
schedule_at: nowUtc,
|
|
901
|
-
session_target: 'shell',
|
|
902
|
-
payload_kind: 'shellCommand',
|
|
903
|
-
payload_message: watcherCmd,
|
|
904
|
-
delivery_mode: 'announce-always',
|
|
905
|
-
delivery_channel: deliverChannel,
|
|
906
|
-
delivery_to: deliverTo,
|
|
907
|
-
delivery_guarantee: 'at-least-once',
|
|
908
|
-
ttl_hours: config.deliver_watcher_ttl_hours ?? 48, // configurable TTL (deliver_watcher_ttl_hours); default 48h
|
|
909
|
-
overlap_policy: 'skip',
|
|
910
|
-
// Shell ceiling = max(initial timeout, rolling extension cap) + headroom.
|
|
911
|
-
// The watcher can extend its deadline up to MAX_DEADLINE_EXTENSION (4h) on
|
|
912
|
-
// activity (token growth / JSONL mtime). Headroom covers 2*FLAT_WINDOW + slop.
|
|
913
|
-
// Watcher constants: FLAT_WINDOW_MS=180s, MAX_DEADLINE_EXTENSION=4h.
|
|
914
|
-
run_timeout_ms: Math.max(watcherTimeoutS, 4 * 3600) * 1000
|
|
915
|
-
+ 420 * 1000, // +7min headroom (2*FLAT_WINDOW + 1min slop)
|
|
916
|
-
delete_after_run: 1, // auto-delete after watcher completes
|
|
917
|
-
origin: origin || 'system',
|
|
918
|
-
});
|
|
919
|
-
const schedulerCli = join(__dirname, '..', 'cli.js');
|
|
920
|
-
execFileSync(process.execPath, [schedulerCli, 'jobs', 'add', jobSpec], {
|
|
921
|
-
encoding: 'utf-8',
|
|
922
|
-
timeout: 10000,
|
|
923
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1099
|
+
const watcherJob = scheduleDeliveryWatcherJob({
|
|
1100
|
+
label,
|
|
1101
|
+
deliverTo,
|
|
1102
|
+
deliverChannel,
|
|
1103
|
+
timeoutSeconds: timeoutS,
|
|
1104
|
+
idleThresholdSeconds: flags['idle-threshold'] || '300',
|
|
1105
|
+
origin: origin || 'system',
|
|
1106
|
+
agentBrand,
|
|
924
1107
|
});
|
|
925
1108
|
schedulerWatcherOk = true;
|
|
926
|
-
process.stderr.write(
|
|
1109
|
+
process.stderr.write(
|
|
1110
|
+
`[${agentBrand}] scheduler watcher registered: ${agentBrand}-deliver:${label}` +
|
|
1111
|
+
`${watcherJob?.id ? ` (${watcherJob.id})` : ''}\n`
|
|
1112
|
+
);
|
|
927
1113
|
} catch (err) {
|
|
928
1114
|
process.stderr.write(`[${agentBrand}] scheduler watcher FAILED (gateway fallback active): ${err.message}\n`);
|
|
929
1115
|
}
|
|
@@ -934,7 +1120,7 @@ async function cmdEnqueue(flags) {
|
|
|
934
1120
|
let watchdogJobId = null;
|
|
935
1121
|
if (monitorEnabled && deliverTo) {
|
|
936
1122
|
try {
|
|
937
|
-
const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}'
|
|
1123
|
+
const checkCmd = `'${sq(process.execPath)}' '${sq(join(__dirname, 'index.mjs'))}' result --label '${sq(label)}'`;
|
|
938
1124
|
const alertChannel = deliverChannel || 'telegram';
|
|
939
1125
|
const alertTarget = deliverTo;
|
|
940
1126
|
const watchdogSpec = JSON.stringify({
|
|
@@ -979,6 +1165,18 @@ async function cmdEnqueue(flags) {
|
|
|
979
1165
|
}
|
|
980
1166
|
}
|
|
981
1167
|
|
|
1168
|
+
const delivery = buildDispatchDeliverySurface({
|
|
1169
|
+
deliverTo,
|
|
1170
|
+
deliverChannel,
|
|
1171
|
+
deliveryMode: deliverMode,
|
|
1172
|
+
deliveryDisabled,
|
|
1173
|
+
deliveryDisabledReason,
|
|
1174
|
+
...(deliverTo ? {
|
|
1175
|
+
scheduler: schedulerWatcherOk,
|
|
1176
|
+
gateway: true,
|
|
1177
|
+
} : {}),
|
|
1178
|
+
});
|
|
1179
|
+
|
|
982
1180
|
out({
|
|
983
1181
|
ok: true,
|
|
984
1182
|
label,
|
|
@@ -987,12 +1185,7 @@ async function cmdEnqueue(flags) {
|
|
|
987
1185
|
mode: isFresh ? 'fresh' : 'reuse',
|
|
988
1186
|
agent,
|
|
989
1187
|
status: 'accepted',
|
|
990
|
-
delivery
|
|
991
|
-
scheduler: schedulerWatcherOk,
|
|
992
|
-
gateway: !!deliverTo,
|
|
993
|
-
target: deliverTo,
|
|
994
|
-
channel: deliverChannel,
|
|
995
|
-
} : null,
|
|
1188
|
+
delivery,
|
|
996
1189
|
watchdog: monitorEnabled ? {
|
|
997
1190
|
enabled: watchdogJobOk,
|
|
998
1191
|
jobId: watchdogJobId,
|
|
@@ -1000,11 +1193,13 @@ async function cmdEnqueue(flags) {
|
|
|
1000
1193
|
timeout: monitorTimeout,
|
|
1001
1194
|
...(monitorEnabled && !deliverTo ? { skipped: true, reason: 'no --deliver-to target' } : {}),
|
|
1002
1195
|
} : null,
|
|
1003
|
-
message:
|
|
1004
|
-
?
|
|
1005
|
-
:
|
|
1006
|
-
? 'Session spawned. Delivery via
|
|
1007
|
-
:
|
|
1196
|
+
message: delivery.status === 'disabled'
|
|
1197
|
+
? `Session spawned. Delivery intentionally disabled${delivery.reason ? ` (${delivery.reason}).` : '.'}`
|
|
1198
|
+
: schedulerWatcherOk
|
|
1199
|
+
? 'Session spawned. Delivery via scheduler (primary) + gateway (secondary).'
|
|
1200
|
+
: deliverTo
|
|
1201
|
+
? 'Session spawned. Delivery via gateway only (scheduler watcher failed).'
|
|
1202
|
+
: 'Session spawned. Delivery target missing or not recorded.',
|
|
1008
1203
|
});
|
|
1009
1204
|
|
|
1010
1205
|
// -- Post-spawn verification (Fix 3) --------------------------------
|
|
@@ -1018,17 +1213,25 @@ async function cmdEnqueue(flags) {
|
|
|
1018
1213
|
for (let spawnPoll = 0; spawnPoll < SPAWN_POLL_MAX; spawnPoll++) {
|
|
1019
1214
|
await sleep(SPAWN_POLL_DELAY_MS);
|
|
1020
1215
|
const spawnStore = readSessionsStore(agent);
|
|
1021
|
-
|
|
1216
|
+
const signal = inspectSessionActivitySignal(sessionKey, spawnStore);
|
|
1217
|
+
if (signal.hasActivitySignal) {
|
|
1022
1218
|
spawnConfirmed = true;
|
|
1023
1219
|
break;
|
|
1024
1220
|
}
|
|
1025
1221
|
}
|
|
1026
1222
|
if (!spawnConfirmed) {
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1223
|
+
const laneError = getGatewayLaneTaskError(sessionKey);
|
|
1224
|
+
const spawnError = laneError.found && laneError.error
|
|
1225
|
+
? `spawn-failure: ${laneError.error}`
|
|
1226
|
+
: `spawn-failure: session ${sessionKey} never produced transcript/history within ` +
|
|
1227
|
+
`${(SPAWN_POLL_MAX * SPAWN_POLL_DELAY_MS) / 1000}s`;
|
|
1228
|
+
process.stderr.write(`[${agentBrand}] WARNING: ${spawnError}\n`);
|
|
1229
|
+
setLabel(label, {
|
|
1230
|
+
status: 'error',
|
|
1231
|
+
error: spawnError,
|
|
1232
|
+
summary: spawnError,
|
|
1233
|
+
});
|
|
1234
|
+
disarmWatchdog(label);
|
|
1032
1235
|
}
|
|
1033
1236
|
} catch (err) {
|
|
1034
1237
|
die(`gateway agent call failed: ${err.message}`);
|
|
@@ -1065,62 +1268,80 @@ function cmdStatus(flags) {
|
|
|
1065
1268
|
const ageMs = Date.now() - spawnedAtMs;
|
|
1066
1269
|
const STARTUP_GRACE_MS = config.startupGraceMs ?? 300_000;
|
|
1067
1270
|
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
//
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1271
|
+
const bootstrapFailure = !entry.lastPing
|
|
1272
|
+
? inspectSessionBootstrapFailure(
|
|
1273
|
+
entry.sessionKey,
|
|
1274
|
+
sessionsStore,
|
|
1275
|
+
spawnedAtMs,
|
|
1276
|
+
STARTUP_GRACE_MS,
|
|
1277
|
+
)
|
|
1278
|
+
: { shouldResolve: false, reason: null, errorMsg: null };
|
|
1279
|
+
if (bootstrapFailure.shouldResolve) {
|
|
1280
|
+
setLabel(label, {
|
|
1281
|
+
status: 'error',
|
|
1282
|
+
error: bootstrapFailure.errorMsg,
|
|
1283
|
+
summary: `Auto-resolved as spawn failure: ${bootstrapFailure.reason}`,
|
|
1284
|
+
});
|
|
1285
|
+
syncAction = `auto-resolved as spawn failure: ${bootstrapFailure.reason}`;
|
|
1286
|
+
disarmWatchdog(label);
|
|
1287
|
+
} else {
|
|
1288
|
+
// -- Heartbeat-based liveness guard ----------------------------------
|
|
1289
|
+
// The watcher process writes lastPing every 60s while the session is live.
|
|
1290
|
+
// If the ping is fresh, the watcher is alive and working -- defer auto-resolve
|
|
1291
|
+
// to avoid killing sessions during slow tool calls, docker builds, etc.
|
|
1292
|
+
//
|
|
1293
|
+
// PING_STALE_MS: 3x the 60s ping interval -- if we haven't heard from the
|
|
1294
|
+
// watcher in 3 min, it's probably dead; fall through to check.
|
|
1295
|
+
// hardCeilingMs: job timeout * 1.5 -- absolute max regardless of ping age.
|
|
1296
|
+
// Catches zombie watchers (watcher alive but session is stuck).
|
|
1297
|
+
// idleThresholdMs: max(job timeout, 10 min) -- replaces the old hardcoded 10-min
|
|
1298
|
+
// threshold so longer jobs aren't killed at exactly 10 min.
|
|
1299
|
+
const PING_STALE_MS = 3 * 60 * 1000;
|
|
1300
|
+
const idleThresholdMs = Math.max((entry.timeoutSeconds || 600) * 1000, 10 * 60 * 1000);
|
|
1301
|
+
// hardCeilingMs must be >= idleThresholdMs to avoid the ceiling undercutting the
|
|
1302
|
+
// idle floor (e.g. timeoutSeconds=300 -> ceiling=7.5 min < idle=10 min would force
|
|
1303
|
+
// zombie-guard threshold for sessions that should still use idleThresholdMs).
|
|
1304
|
+
const hardCeilingMs = Math.max((entry.timeoutSeconds || 600) * 1000 * 1.5, idleThresholdMs * 1.5);
|
|
1305
|
+
|
|
1306
|
+
let check;
|
|
1307
|
+
if (ageMs < STARTUP_GRACE_MS) {
|
|
1308
|
+
// Within startup grace -- never auto-resolve
|
|
1094
1309
|
check = { shouldResolve: false };
|
|
1310
|
+
} else if (entry.lastPing) {
|
|
1311
|
+
const pingAgeMs = Date.now() - new Date(entry.lastPing).getTime();
|
|
1312
|
+
if (pingAgeMs < PING_STALE_MS && ageMs < hardCeilingMs) {
|
|
1313
|
+
// Watcher alive and within job ceiling -- defer auto-resolve
|
|
1314
|
+
check = { shouldResolve: false };
|
|
1315
|
+
} else {
|
|
1316
|
+
// Ping stale OR past hard ceiling: fall through to session store check
|
|
1317
|
+
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1318
|
+
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1319
|
+
}
|
|
1095
1320
|
} else {
|
|
1096
|
-
//
|
|
1321
|
+
// No lastPing -- backward compat (sessions dispatched before heartbeat feature).
|
|
1322
|
+
// Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
|
|
1097
1323
|
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1098
1324
|
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1099
1325
|
}
|
|
1100
|
-
} else {
|
|
1101
|
-
// No lastPing -- backward compat (sessions dispatched before heartbeat feature).
|
|
1102
|
-
// Use idleThresholdMs (job-aware) instead of the old hardcoded 10 min.
|
|
1103
|
-
const thresh = ageMs >= hardCeilingMs ? 2 * 60 * 1000 : idleThresholdMs;
|
|
1104
|
-
check = checkSessionDone(entry.sessionKey, sessionsStore, thresh, true, spawnedAtMs);
|
|
1105
|
-
}
|
|
1106
1326
|
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1327
|
+
if (check.shouldResolve) {
|
|
1328
|
+
if (check.is529) {
|
|
1329
|
+
setLabel(label, {
|
|
1330
|
+
status: 'error',
|
|
1331
|
+
error: check.errorMsg || `529/overload: ${check.reason}`,
|
|
1332
|
+
summary: `Auto-resolved as error: ${check.reason}`,
|
|
1333
|
+
});
|
|
1334
|
+
syncAction = `auto-resolved as 529 error: ${check.reason}`;
|
|
1335
|
+
} else {
|
|
1336
|
+
setLabel(label, {
|
|
1337
|
+
status: 'interrupted',
|
|
1338
|
+
summary: `Auto-resolved: session went idle without calling done. Work may be incomplete. (${check.reason})`,
|
|
1339
|
+
});
|
|
1340
|
+
syncAction = `auto-resolved as interrupted: ${check.reason}`;
|
|
1341
|
+
}
|
|
1342
|
+
// Disarm watchdog when session is auto-resolved
|
|
1343
|
+
disarmWatchdog(label);
|
|
1121
1344
|
}
|
|
1122
|
-
// Disarm watchdog when session is auto-resolved
|
|
1123
|
-
disarmWatchdog(label);
|
|
1124
1345
|
}
|
|
1125
1346
|
}
|
|
1126
1347
|
|
|
@@ -1128,6 +1349,9 @@ function cmdStatus(flags) {
|
|
|
1128
1349
|
if (entry.sessionKey && sessionsStore) {
|
|
1129
1350
|
const sessionEntry = sessionsStore[entry.sessionKey];
|
|
1130
1351
|
if (sessionEntry) {
|
|
1352
|
+
if (sessionEntry.sessionId && entry.sessionId !== sessionEntry.sessionId) {
|
|
1353
|
+
setLabel(label, { sessionId: sessionEntry.sessionId });
|
|
1354
|
+
}
|
|
1131
1355
|
liveness = {
|
|
1132
1356
|
updatedAt: sessionEntry.updatedAt,
|
|
1133
1357
|
ageMs: sessionEntry.updatedAt
|
|
@@ -1159,6 +1383,7 @@ function cmdStatus(flags) {
|
|
|
1159
1383
|
updatedAt: current.updatedAt,
|
|
1160
1384
|
summary: current.summary || null,
|
|
1161
1385
|
completion: current.completion || null,
|
|
1386
|
+
delivery: buildDispatchDeliverySurface(current),
|
|
1162
1387
|
error: current.error || null,
|
|
1163
1388
|
liveness,
|
|
1164
1389
|
...(syncAction ? { syncAction } : {}),
|
|
@@ -1192,7 +1417,7 @@ function hasActiveWatcher(label) {
|
|
|
1192
1417
|
r.status = 'running'
|
|
1193
1418
|
OR (r.status = 'pending' AND r.started_at > datetime('now','-5 minutes'))
|
|
1194
1419
|
)
|
|
1195
|
-
`).get(`%-deliver:${label}
|
|
1420
|
+
`).get(`%-deliver:${label}%`);
|
|
1196
1421
|
return (row?.c || 0) > 0;
|
|
1197
1422
|
} catch {
|
|
1198
1423
|
return false;
|
|
@@ -1346,6 +1571,28 @@ function cmdSync(flags) {
|
|
|
1346
1571
|
const syncStore = getSyncStore(entry);
|
|
1347
1572
|
const spawnedAtMs = entry.spawnedAt ? new Date(entry.spawnedAt).getTime() : 0;
|
|
1348
1573
|
const elapsedMs = Date.now() - spawnedAtMs;
|
|
1574
|
+
const STARTUP_GRACE_MS_SYNC = config.startupGraceMs ?? 300_000;
|
|
1575
|
+
|
|
1576
|
+
const bootstrapFailure = !entry.lastPing
|
|
1577
|
+
? inspectSessionBootstrapFailure(
|
|
1578
|
+
entry.sessionKey,
|
|
1579
|
+
syncStore,
|
|
1580
|
+
spawnedAtMs,
|
|
1581
|
+
STARTUP_GRACE_MS_SYNC,
|
|
1582
|
+
)
|
|
1583
|
+
: { shouldResolve: false, reason: null, errorMsg: null };
|
|
1584
|
+
if (bootstrapFailure.shouldResolve) {
|
|
1585
|
+
changes.push({ label: name, from: 'running', to: 'error', reason: bootstrapFailure.reason });
|
|
1586
|
+
if (!dryRun) {
|
|
1587
|
+
setLabel(name, {
|
|
1588
|
+
status: 'error',
|
|
1589
|
+
error: bootstrapFailure.errorMsg,
|
|
1590
|
+
summary: `Synced as spawn failure: ${bootstrapFailure.reason}`,
|
|
1591
|
+
});
|
|
1592
|
+
disarmWatchdog(name);
|
|
1593
|
+
}
|
|
1594
|
+
continue;
|
|
1595
|
+
}
|
|
1349
1596
|
|
|
1350
1597
|
// -- Heartbeat-based liveness guard (mirrors cmdStatus logic) ---------
|
|
1351
1598
|
// Skip auto-resolve when the watcher's lastPing heartbeat is fresh.
|
|
@@ -1412,32 +1659,62 @@ function cmdResult(flags) {
|
|
|
1412
1659
|
return;
|
|
1413
1660
|
}
|
|
1414
1661
|
|
|
1415
|
-
//
|
|
1662
|
+
// Conservative transcript recovery:
|
|
1663
|
+
// - lastReply is ONLY populated from a terminal JSONL-scoped assistant reply
|
|
1664
|
+
// - diagnosticReply captures the last meaningful assistant text for timeout reporting
|
|
1416
1665
|
let lastReply = null;
|
|
1666
|
+
let diagnosticReply = null;
|
|
1667
|
+
let recoverySource = null;
|
|
1668
|
+
let recoverySessionId = entry.sessionId || null;
|
|
1669
|
+
const resultAgent = entry.agent || agentFromSessionKey(entry.sessionKey) || 'main';
|
|
1670
|
+
const resultStore = entry.sessionKey ? readSessionsStore(resultAgent) : null;
|
|
1671
|
+
const resultSessionEntry = entry.sessionKey && resultStore ? resultStore[entry.sessionKey] : null;
|
|
1672
|
+
|
|
1673
|
+
if (resultSessionEntry?.sessionId) {
|
|
1674
|
+
recoverySessionId = resultSessionEntry.sessionId;
|
|
1675
|
+
if (entry.sessionId !== recoverySessionId) {
|
|
1676
|
+
setLabel(label, { sessionId: recoverySessionId });
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
if (recoverySessionId) {
|
|
1681
|
+
const jsonlEntries = readJsonlTailEntries(recoverySessionId, resultAgent, 200);
|
|
1682
|
+
const terminalReply = extractTerminalAssistantReplyFromEntries(jsonlEntries);
|
|
1683
|
+
const jsonlDiagnostic = extractLastMeaningfulAssistantReplyFromEntries(jsonlEntries);
|
|
1684
|
+
|
|
1685
|
+
if (terminalReply) {
|
|
1686
|
+
lastReply = terminalReply;
|
|
1687
|
+
recoverySource = 'jsonl-terminal';
|
|
1688
|
+
}
|
|
1689
|
+
if (jsonlDiagnostic) {
|
|
1690
|
+
diagnosticReply = jsonlDiagnostic;
|
|
1691
|
+
if (!recoverySource) recoverySource = 'jsonl-diagnostic';
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1417
1695
|
if (entry.sessionKey) {
|
|
1418
1696
|
try {
|
|
1419
1697
|
const result = gatewayCall('chat.history', {
|
|
1420
1698
|
sessionKey: entry.sessionKey,
|
|
1421
1699
|
}, { timeout: 10000 });
|
|
1422
1700
|
|
|
1423
|
-
if (result?.messages?.length) {
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
}
|
|
1701
|
+
if (result?.messages?.length && !diagnosticReply) {
|
|
1702
|
+
diagnosticReply = extractLastMeaningfulAssistantReplyFromEntries(result.messages);
|
|
1703
|
+
if (diagnosticReply && !recoverySource) recoverySource = 'history-diagnostic';
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
if (!lastReply && result?.messages?.length) {
|
|
1707
|
+
const historyTerminal = extractTerminalAssistantReplyFromEntries(result.messages);
|
|
1708
|
+
if (historyTerminal) {
|
|
1709
|
+
lastReply = historyTerminal;
|
|
1710
|
+
recoverySource = 'history-terminal';
|
|
1434
1711
|
}
|
|
1435
1712
|
}
|
|
1436
1713
|
} catch {}
|
|
1437
1714
|
}
|
|
1438
1715
|
|
|
1439
1716
|
// -- Watchdog cleanup: disable watchdog job when result is available --
|
|
1440
|
-
if (lastReply && entry.watchdogJobId) {
|
|
1717
|
+
if ((lastReply || hasCompletionSignal(entry.completion)) && entry.watchdogJobId) {
|
|
1441
1718
|
disarmWatchdog(label);
|
|
1442
1719
|
}
|
|
1443
1720
|
|
|
@@ -1449,11 +1726,64 @@ function cmdResult(flags) {
|
|
|
1449
1726
|
spawnedAt: entry.spawnedAt,
|
|
1450
1727
|
summary: entry.summary || (lastReply ? lastReply.slice(0, 500) : null),
|
|
1451
1728
|
completion: entry.completion || null,
|
|
1729
|
+
delivery: buildDispatchDeliverySurface(entry),
|
|
1452
1730
|
lastReply: lastReply || null,
|
|
1731
|
+
diagnosticReply: diagnosticReply || lastReply || null,
|
|
1732
|
+
recovery: recoverySource || recoverySessionId ? {
|
|
1733
|
+
source: recoverySource || null,
|
|
1734
|
+
sessionId: recoverySessionId || null,
|
|
1735
|
+
} : null,
|
|
1453
1736
|
error: entry.error || null,
|
|
1454
1737
|
});
|
|
1455
1738
|
}
|
|
1456
1739
|
|
|
1740
|
+
|
|
1741
|
+
function cmdWatcherHandoff(flags) {
|
|
1742
|
+
const label = flags.label;
|
|
1743
|
+
const reason = flags.reason || null;
|
|
1744
|
+
if (!label) die('--label is required', 2);
|
|
1745
|
+
|
|
1746
|
+
const entry = getLabel(label);
|
|
1747
|
+
if (!entry) {
|
|
1748
|
+
out({ ok: false, scheduled: false, label, message: 'No session found for this label' });
|
|
1749
|
+
return;
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
if (entry.status && entry.status !== 'running') {
|
|
1753
|
+
out({ ok: true, scheduled: false, label, reason: 'label already terminal', status: entry.status });
|
|
1754
|
+
return;
|
|
1755
|
+
}
|
|
1756
|
+
|
|
1757
|
+
if (!entry.deliverTo || entry.deliveryMode === 'none') {
|
|
1758
|
+
out({ ok: true, scheduled: false, label, reason: 'delivery disabled for this label' });
|
|
1759
|
+
return;
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
const agentBrand = config.agents?.[entry.agent || 'main']?.name
|
|
1763
|
+
|| (entry.agent && entry.agent !== 'main' ? entry.agent : null)
|
|
1764
|
+
|| config.name
|
|
1765
|
+
|| BRAND;
|
|
1766
|
+
|
|
1767
|
+
const watcherJob = scheduleDeliveryWatcherJob({
|
|
1768
|
+
label,
|
|
1769
|
+
deliverTo: entry.deliverTo,
|
|
1770
|
+
deliverChannel: entry.deliverChannel || 'telegram',
|
|
1771
|
+
timeoutSeconds: Number(entry.timeoutSeconds ?? entry.timeout) || 300,
|
|
1772
|
+
idleThresholdSeconds: Number(entry.idleThresholdSeconds) || 300,
|
|
1773
|
+
origin: entry.origin || 'system',
|
|
1774
|
+
agentBrand,
|
|
1775
|
+
nameSuffix: `:handoff:${Date.now()}`,
|
|
1776
|
+
});
|
|
1777
|
+
|
|
1778
|
+
out({
|
|
1779
|
+
ok: true,
|
|
1780
|
+
scheduled: true,
|
|
1781
|
+
label,
|
|
1782
|
+
jobId: watcherJob?.id || null,
|
|
1783
|
+
reason,
|
|
1784
|
+
});
|
|
1785
|
+
}
|
|
1786
|
+
|
|
1457
1787
|
/**
|
|
1458
1788
|
* done -- agent-side completion signal (push-based).
|
|
1459
1789
|
* Called by the subagent itself as its LAST action when fully complete.
|
|
@@ -1518,15 +1848,15 @@ async function cmdDone(flags) {
|
|
|
1518
1848
|
}
|
|
1519
1849
|
}
|
|
1520
1850
|
|
|
1521
|
-
//
|
|
1522
|
-
//
|
|
1523
|
-
//
|
|
1851
|
+
// Persist a first-class completion payload with deterministic delivery text
|
|
1852
|
+
// so the watcher/post-office path never depends solely on transcript recovery
|
|
1853
|
+
// or on whatever raw blob the model chose to print at the end.
|
|
1524
1854
|
const completion = buildTerminalCompletionPayload({
|
|
1525
1855
|
summary: rawSummary,
|
|
1526
1856
|
checklist,
|
|
1527
1857
|
sha,
|
|
1528
1858
|
});
|
|
1529
|
-
const summary = completion.summary ||
|
|
1859
|
+
const summary = completion.summary || null;
|
|
1530
1860
|
|
|
1531
1861
|
const existing = getLabel(label);
|
|
1532
1862
|
|
|
@@ -1659,6 +1989,7 @@ async function cmdDone(flags) {
|
|
|
1659
1989
|
duration_ms: 0,
|
|
1660
1990
|
session_key: null,
|
|
1661
1991
|
summary,
|
|
1992
|
+
completion,
|
|
1662
1993
|
deliverTo,
|
|
1663
1994
|
deliveryChannel,
|
|
1664
1995
|
}).catch(() => {});
|
|
@@ -1690,6 +2021,8 @@ async function cmdDone(flags) {
|
|
|
1690
2021
|
status: 'ok',
|
|
1691
2022
|
duration_ms: Date.now() - spawnedAtMs,
|
|
1692
2023
|
session_key: existing.sessionKey || null,
|
|
2024
|
+
summary,
|
|
2025
|
+
completion,
|
|
1693
2026
|
}).catch(() => {});
|
|
1694
2027
|
|
|
1695
2028
|
out({ ok: true, label, status: 'done', summary, completion, message: 'Label marked done via agent signal.' });
|
|
@@ -1699,16 +2032,31 @@ async function cmdDone(flags) {
|
|
|
1699
2032
|
* send / steer -- send a message into a running session.
|
|
1700
2033
|
*
|
|
1701
2034
|
* Flags:
|
|
1702
|
-
* --label <string>
|
|
1703
|
-
* --message <string>
|
|
1704
|
-
* --
|
|
2035
|
+
* --label <string> Required (unless --session-key)
|
|
2036
|
+
* --message <string> Message to send
|
|
2037
|
+
* --message-file <path> Read message text from a file (`-` = stdin)
|
|
2038
|
+
* --message-env <VAR> Read message text from an environment variable
|
|
2039
|
+
* --message-stdin Read message text from stdin explicitly
|
|
2040
|
+
* (stdin is also auto-read when piped and no other message source is set)
|
|
2041
|
+
* --session-key <key> Optional. Direct session key (bypasses label lookup)
|
|
1705
2042
|
*/
|
|
1706
2043
|
async function cmdSend(flags) {
|
|
1707
|
-
const label
|
|
1708
|
-
const message = flags.message;
|
|
2044
|
+
const label = flags.label;
|
|
1709
2045
|
const directKey = flags['session-key'];
|
|
2046
|
+
let message = null;
|
|
1710
2047
|
|
|
1711
|
-
|
|
2048
|
+
try {
|
|
2049
|
+
message = await resolveMessageInput({
|
|
2050
|
+
message: flags.message,
|
|
2051
|
+
messageFile: flags['message-file'],
|
|
2052
|
+
messageEnv: flags['message-env'],
|
|
2053
|
+
messageStdin: flags['message-stdin'],
|
|
2054
|
+
});
|
|
2055
|
+
} catch (err) {
|
|
2056
|
+
die(err.message, 2);
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
if (message === null || message.length === 0) die('--message, --message-file, --message-env, --message-stdin, or piped stdin is required', 2);
|
|
1712
2060
|
if (!label && !directKey) die('--label or --session-key is required', 2);
|
|
1713
2061
|
|
|
1714
2062
|
let sessionKey = directKey;
|
|
@@ -1807,6 +2155,7 @@ function cmdList(flags) {
|
|
|
1807
2155
|
let entries = Object.entries(labels).map(([name, data]) => ({
|
|
1808
2156
|
label: name,
|
|
1809
2157
|
...data,
|
|
2158
|
+
delivery: buildDispatchDeliverySurface(data),
|
|
1810
2159
|
}));
|
|
1811
2160
|
|
|
1812
2161
|
if (filterStatus) {
|
|
@@ -1833,13 +2182,15 @@ ${BRAND} -- sub-agent dispatch CLI (native gateway API)
|
|
|
1833
2182
|
Usage: openclaw-scheduler <subcommand> [flags]
|
|
1834
2183
|
|
|
1835
2184
|
Subcommands:
|
|
1836
|
-
enqueue --label <l> --message <m>|--message-file <f
|
|
1837
|
-
[--timeout <s>] [--mode fresh|reuse] [--model <m>]
|
|
1838
|
-
[--origin <o>] (
|
|
2185
|
+
enqueue --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2186
|
+
[--agent <a>] [--thinking <t>] [--timeout <s>] [--mode fresh|reuse] [--model <m>]
|
|
2187
|
+
[--origin <o>] (recommended explicit value, e.g. "telegram:<chat_id>" or "system")
|
|
1839
2188
|
[--deliver-to <id>] [--deliver-channel <ch>] [--delivery-mode <m>]
|
|
1840
|
-
(--deliver-to
|
|
2189
|
+
(--deliver-to should come from inbound metadata chat_id; explicit --deliver-to becomes origin when --origin is omitted)
|
|
2190
|
+
(active-session auto-detect is preserved only as a manual/local fallback)
|
|
1841
2191
|
[--no-monitor] [--monitor-interval <cron>] [--monitor-timeout <min>]
|
|
1842
2192
|
[--verify-cmd <shell_cmd>]
|
|
2193
|
+
(stdin is auto-read when piped and no explicit message source is set)
|
|
1843
2194
|
|
|
1844
2195
|
status --label <l>
|
|
1845
2196
|
|
|
@@ -1847,9 +2198,13 @@ Subcommands:
|
|
|
1847
2198
|
|
|
1848
2199
|
result --label <l>
|
|
1849
2200
|
|
|
1850
|
-
|
|
2201
|
+
watcher-handoff --label <l> [--reason <text>]
|
|
2202
|
+
|
|
2203
|
+
send --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2204
|
+
[--session-key <k>]
|
|
1851
2205
|
|
|
1852
|
-
steer --label <l> --message <m
|
|
2206
|
+
steer --label <l> [--message <m>|--message-file <f>|--message-env <VAR>|--message-stdin]
|
|
2207
|
+
(alias for send)
|
|
1853
2208
|
|
|
1854
2209
|
heartbeat --label <l> OR --session-key <k>
|
|
1855
2210
|
|
|
@@ -1871,6 +2226,7 @@ switch (subcommand) {
|
|
|
1871
2226
|
case 'status': cmdStatus(flags); break;
|
|
1872
2227
|
case 'stuck': await cmdStuck(flags); break;
|
|
1873
2228
|
case 'result': cmdResult(flags); break;
|
|
2229
|
+
case 'watcher-handoff': cmdWatcherHandoff(flags); break;
|
|
1874
2230
|
case 'send': await cmdSend(flags); break;
|
|
1875
2231
|
case 'steer': await cmdSend(flags); break;
|
|
1876
2232
|
case 'heartbeat': cmdHeartbeat(flags); break;
|