openclaw-scheduler 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +302 -0
- package/BEST-PRACTICES.md +506 -0
- package/CHANGELOG.md +82 -0
- package/CODE_OF_CONDUCT.md +22 -0
- package/CONTEXT.md +26 -0
- package/CONTRIBUTING.md +73 -0
- package/IMPLEMENTATION_SPEC.md +170 -0
- package/INSTALL-ADDITIONAL-HOST.md +333 -0
- package/INSTALL-LINUX.md +419 -0
- package/INSTALL-WINDOWS.md +305 -0
- package/INSTALL.md +364 -0
- package/JOB-QUICK-REF.md +222 -0
- package/LICENSE +21 -0
- package/QUICK-START.md +256 -0
- package/README.md +2170 -0
- package/SECURITY.md +34 -0
- package/UNINSTALL.md +129 -0
- package/UPGRADING.md +436 -0
- package/agents.js +67 -0
- package/approval.js +107 -0
- package/backup.js +390 -0
- package/bin/openclaw-scheduler.js +138 -0
- package/cli.js +1083 -0
- package/db.js +122 -0
- package/dispatch/529-recovery.mjs +204 -0
- package/dispatch/README.md +372 -0
- package/dispatch/config.example.json +24 -0
- package/dispatch/deliver-watcher.sh +57 -0
- package/dispatch/hooks.mjs +171 -0
- package/dispatch/index.mjs +1836 -0
- package/dispatch/watcher.mjs +1396 -0
- package/dispatch-queue.js +112 -0
- package/dispatcher-approvals.js +96 -0
- package/dispatcher-delivery.js +43 -0
- package/dispatcher-maintenance.js +242 -0
- package/dispatcher-shell.js +29 -0
- package/dispatcher-strategies.js +1280 -0
- package/dispatcher-utils.js +81 -0
- package/dispatcher.js +855 -0
- package/docs/adr-schedule-ownership.md +73 -0
- package/docs/gateway-contract.md +904 -0
- package/docs/plans/2026-03-09-fix-typescript-types.md +91 -0
- package/docs/plans/2026-03-09-test-coverage-gaps.md +83 -0
- package/docs/plans/2026-03-10-dispatcher-refactor.md +801 -0
- package/docs/trust-architecture.md +266 -0
- package/gateway.js +473 -0
- package/idempotency.js +119 -0
- package/index.d.ts +864 -0
- package/index.js +17 -0
- package/jobs.js +1224 -0
- package/messages.js +357 -0
- package/migrate-consolidate.js +694 -0
- package/migrate.js +125 -0
- package/package.json +130 -0
- package/paths.js +79 -0
- package/prompt-context.js +94 -0
- package/retrieval.js +176 -0
- package/runs.js +270 -0
- package/scheduler-schema.js +101 -0
- package/schema.sql +480 -0
- package/scripts/dispatch-cli-utils.mjs +65 -0
- package/scripts/inbox-consumer.mjs +288 -0
- package/scripts/stuck-detector.sh +18 -0
- package/scripts/stuck-run-detector.mjs +333 -0
- package/scripts/telegram-webhook-check.mjs +238 -0
- package/setup.mjs +724 -0
- package/shell-result.js +214 -0
- package/task-tracker.js +300 -0
- package/team-adapter.js +335 -0
- package/v02-runtime.js +599 -0
package/shell-result.js
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { writeFileSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { getResolvedDbPath } from './db.js';
|
|
4
|
+
import { ensureArtifactsDir, resolveArtifactsDir } from './paths.js';
|
|
5
|
+
|
|
6
|
+
export const DEFAULT_STORE_LIMIT = 64 * 1024;
|
|
7
|
+
export const DEFAULT_EXCERPT_LIMIT = 2000;
|
|
8
|
+
export const DEFAULT_SUMMARY_LIMIT = 5000;
|
|
9
|
+
export const DEFAULT_OFFLOAD_THRESHOLD = 64 * 1024;
|
|
10
|
+
|
|
11
|
+
function toText(value) {
|
|
12
|
+
if (value == null) return '';
|
|
13
|
+
return typeof value === 'string' ? value : String(value);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function textBytes(value) {
|
|
17
|
+
return Buffer.byteLength(toText(value), 'utf8');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Truncate to a byte limit (UTF-8). Slices at a character boundary that
|
|
21
|
+
// does not exceed the byte budget, so multi-byte characters are never split.
|
|
22
|
+
function truncateText(value, limitBytes) {
|
|
23
|
+
const text = toText(value).trim();
|
|
24
|
+
if (!text) return { text: '', truncated: false, bytes: 0 };
|
|
25
|
+
const bytes = textBytes(text);
|
|
26
|
+
if (bytes <= limitBytes) return { text, truncated: false, bytes };
|
|
27
|
+
// Walk characters until we exceed the byte budget minus suffix room
|
|
28
|
+
const suffix = '\n...[truncated]';
|
|
29
|
+
const suffixBytes = Buffer.byteLength(suffix, 'utf8');
|
|
30
|
+
const target = Math.max(0, limitBytes - suffixBytes);
|
|
31
|
+
let usedBytes = 0;
|
|
32
|
+
let cutIndex = 0;
|
|
33
|
+
for (const char of text) {
|
|
34
|
+
const charBytes = Buffer.byteLength(char, 'utf8');
|
|
35
|
+
if (usedBytes + charBytes > target) break;
|
|
36
|
+
usedBytes += charBytes;
|
|
37
|
+
cutIndex += char.length;
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
text: text.slice(0, cutIndex) + suffix,
|
|
41
|
+
truncated: true,
|
|
42
|
+
bytes,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function deriveErrorMessage(result, timeoutMs) {
|
|
47
|
+
if (result.status === 'ok') return null;
|
|
48
|
+
if (result.timedOut) return `Shell command timed out after ${timeoutMs}ms`;
|
|
49
|
+
if (typeof result.exitCode === 'number') return `Shell exited with code ${result.exitCode}`;
|
|
50
|
+
if (result.signal) return `Shell terminated by signal ${result.signal}`;
|
|
51
|
+
return result.rawError?.message || 'Shell command failed';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function writeOutputArtifact(kind, runId, text, artifactsDir) {
|
|
55
|
+
if (!artifactsDir || !runId || !text.trim()) return null;
|
|
56
|
+
try {
|
|
57
|
+
const baseDir = ensureArtifactsDir(join(artifactsDir, 'runs', runId));
|
|
58
|
+
const filePath = join(baseDir, `${kind}.txt`);
|
|
59
|
+
writeFileSync(filePath, text, 'utf8');
|
|
60
|
+
return filePath;
|
|
61
|
+
} catch (err) {
|
|
62
|
+
process.stderr.write(`[shell-result] writeOutputArtifact failed for ${kind} (run ${runId}): ${err.message}\n`);
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function formatOutputBlock(label, excerpt, artifactPath, bytes) {
|
|
68
|
+
const parts = [];
|
|
69
|
+
if (excerpt.text) {
|
|
70
|
+
parts.push(`${label}:`);
|
|
71
|
+
parts.push(excerpt.text);
|
|
72
|
+
}
|
|
73
|
+
if (artifactPath) {
|
|
74
|
+
parts.push(`[${label} offloaded: ${artifactPath} (${bytes} bytes)]`);
|
|
75
|
+
}
|
|
76
|
+
return parts.join('\n');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function normalizeShellResult(
|
|
80
|
+
{
|
|
81
|
+
stdout = '',
|
|
82
|
+
stderr = '',
|
|
83
|
+
error = null,
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
runId = null,
|
|
87
|
+
timeoutMs = 300000,
|
|
88
|
+
storeLimit = DEFAULT_STORE_LIMIT,
|
|
89
|
+
excerptLimit = DEFAULT_EXCERPT_LIMIT,
|
|
90
|
+
summaryLimit = DEFAULT_SUMMARY_LIMIT,
|
|
91
|
+
offloadThreshold = DEFAULT_OFFLOAD_THRESHOLD,
|
|
92
|
+
artifactsDir = resolveArtifactsDir({ dbPath: getResolvedDbPath() }),
|
|
93
|
+
} = {}
|
|
94
|
+
) {
|
|
95
|
+
const stdoutText = toText(stdout);
|
|
96
|
+
const stderrText = toText(stderr);
|
|
97
|
+
const stdoutBytes = textBytes(stdoutText);
|
|
98
|
+
const stderrBytes = textBytes(stderrText);
|
|
99
|
+
const stdoutOffloaded = stdoutBytes > offloadThreshold
|
|
100
|
+
? writeOutputArtifact('stdout', runId, stdoutText, artifactsDir)
|
|
101
|
+
: null;
|
|
102
|
+
const stderrOffloaded = stderrBytes > offloadThreshold
|
|
103
|
+
? writeOutputArtifact('stderr', runId, stderrText, artifactsDir)
|
|
104
|
+
: null;
|
|
105
|
+
|
|
106
|
+
const stdoutStored = truncateText(stdoutText, Math.min(storeLimit, stdoutOffloaded ? excerptLimit : storeLimit));
|
|
107
|
+
const stderrStored = truncateText(stderrText, Math.min(storeLimit, stderrOffloaded ? excerptLimit : storeLimit));
|
|
108
|
+
const stdoutExcerpt = truncateText(stdoutText, excerptLimit);
|
|
109
|
+
const stderrExcerpt = truncateText(stderrText, excerptLimit);
|
|
110
|
+
|
|
111
|
+
const exitCode = Number.isInteger(error?.code) ? error.code : null;
|
|
112
|
+
const signal = error?.signal || null;
|
|
113
|
+
const timedOut = Boolean(
|
|
114
|
+
error && (
|
|
115
|
+
error.code === 'ETIMEDOUT'
|
|
116
|
+
|| error.killed === true
|
|
117
|
+
|| /timed out/i.test(error?.message || '')
|
|
118
|
+
|| /exceeded absolute timeout/i.test(error?.message || '')
|
|
119
|
+
|| /idle.*timeout/i.test(error?.message || '')
|
|
120
|
+
)
|
|
121
|
+
);
|
|
122
|
+
const status = timedOut ? 'timeout' : error ? 'error' : 'ok';
|
|
123
|
+
const errorMessage = deriveErrorMessage({ status, timedOut, exitCode, signal, rawError: error }, timeoutMs);
|
|
124
|
+
|
|
125
|
+
const blocks = [
|
|
126
|
+
formatOutputBlock('stdout', stdoutExcerpt, stdoutOffloaded, stdoutBytes),
|
|
127
|
+
formatOutputBlock('stderr', stderrExcerpt, stderrOffloaded, stderrBytes),
|
|
128
|
+
].filter(Boolean);
|
|
129
|
+
if (blocks.length === 0 && errorMessage) blocks.push(errorMessage);
|
|
130
|
+
const previewText = blocks.join('\n\n').trim() || '(no output)';
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
status,
|
|
134
|
+
exitCode,
|
|
135
|
+
signal,
|
|
136
|
+
timedOut,
|
|
137
|
+
stdout: stdoutStored.text,
|
|
138
|
+
stderr: stderrStored.text,
|
|
139
|
+
stdoutPath: stdoutOffloaded,
|
|
140
|
+
stderrPath: stderrOffloaded,
|
|
141
|
+
stdoutBytes,
|
|
142
|
+
stderrBytes,
|
|
143
|
+
stdoutTruncated: stdoutStored.truncated,
|
|
144
|
+
stderrTruncated: stderrStored.truncated,
|
|
145
|
+
summary: truncateText(previewText, summaryLimit).text,
|
|
146
|
+
deliveryText: previewText,
|
|
147
|
+
errorMessage,
|
|
148
|
+
contextSummary: {
|
|
149
|
+
shell_result: {
|
|
150
|
+
exit_code: exitCode,
|
|
151
|
+
signal,
|
|
152
|
+
timed_out: timedOut,
|
|
153
|
+
error_message: errorMessage,
|
|
154
|
+
stdout_excerpt: stdoutExcerpt.text,
|
|
155
|
+
stderr_excerpt: stderrExcerpt.text,
|
|
156
|
+
stdout_truncated: stdoutStored.truncated || stdoutExcerpt.truncated,
|
|
157
|
+
stderr_truncated: stderrStored.truncated || stderrExcerpt.truncated,
|
|
158
|
+
stdout_path: stdoutOffloaded,
|
|
159
|
+
stderr_path: stderrOffloaded,
|
|
160
|
+
stdout_bytes: stdoutBytes,
|
|
161
|
+
stderr_bytes: stderrBytes,
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function extractShellResultFromRun(run) {
|
|
168
|
+
if (!run) return null;
|
|
169
|
+
|
|
170
|
+
const hasDirectFields = run.shell_exit_code != null
|
|
171
|
+
|| run.shell_signal != null
|
|
172
|
+
|| run.shell_timed_out != null
|
|
173
|
+
|| (typeof run.shell_stdout === 'string' && run.shell_stdout.length > 0)
|
|
174
|
+
|| (typeof run.shell_stderr === 'string' && run.shell_stderr.length > 0)
|
|
175
|
+
|| typeof run.shell_stdout_path === 'string'
|
|
176
|
+
|| typeof run.shell_stderr_path === 'string';
|
|
177
|
+
|
|
178
|
+
if (hasDirectFields) {
|
|
179
|
+
return {
|
|
180
|
+
exitCode: run.shell_exit_code ?? null,
|
|
181
|
+
signal: run.shell_signal ?? null,
|
|
182
|
+
timedOut: Boolean(run.shell_timed_out),
|
|
183
|
+
stdout: run.shell_stdout || '',
|
|
184
|
+
stderr: run.shell_stderr || '',
|
|
185
|
+
stdoutPath: run.shell_stdout_path || null,
|
|
186
|
+
stderrPath: run.shell_stderr_path || null,
|
|
187
|
+
stdoutBytes: run.shell_stdout_bytes ?? textBytes(run.shell_stdout || ''),
|
|
188
|
+
stderrBytes: run.shell_stderr_bytes ?? textBytes(run.shell_stderr || ''),
|
|
189
|
+
errorMessage: run.error_message || null,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (!run.context_summary) return null;
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
const parsed = JSON.parse(run.context_summary);
|
|
197
|
+
const shell = parsed?.shell_result;
|
|
198
|
+
if (!shell) return null;
|
|
199
|
+
return {
|
|
200
|
+
exitCode: shell.exit_code ?? null,
|
|
201
|
+
signal: shell.signal ?? null,
|
|
202
|
+
timedOut: Boolean(shell.timed_out),
|
|
203
|
+
stdout: shell.stdout_excerpt || '',
|
|
204
|
+
stderr: shell.stderr_excerpt || '',
|
|
205
|
+
stdoutPath: shell.stdout_path || null,
|
|
206
|
+
stderrPath: shell.stderr_path || null,
|
|
207
|
+
stdoutBytes: shell.stdout_bytes ?? 0,
|
|
208
|
+
stderrBytes: shell.stderr_bytes ?? 0,
|
|
209
|
+
errorMessage: shell.error_message || null,
|
|
210
|
+
};
|
|
211
|
+
} catch {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
}
|
package/task-tracker.js
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
// Task Tracker -- dead-man's-switch monitoring for sub-agent teams
|
|
2
|
+
import { getDb } from './db.js';
|
|
3
|
+
import { randomUUID } from 'crypto';
|
|
4
|
+
|
|
5
|
+
// -- Helpers -------------------------------------------------
|
|
6
|
+
function sqliteNow() {
|
|
7
|
+
return new Date().toISOString().replace('T', ' ').replace(/\.\d{3}Z$/, '');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function parseSqliteDate(s) {
|
|
11
|
+
if (!s) return null;
|
|
12
|
+
const normalized = s.includes('T') ? s : s.replace(' ', 'T');
|
|
13
|
+
return new Date(normalized.endsWith('Z') ? normalized : normalized + 'Z');
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// -- Create a new tracked task group -------------------------
|
|
17
|
+
/**
|
|
18
|
+
* @param {object} opts
|
|
19
|
+
* @param {string} opts.name - Human label e.g. "v5-agent-team"
|
|
20
|
+
* @param {string[]} opts.expectedAgents - Array of agent labels
|
|
21
|
+
* @param {number} [opts.timeoutS=600] - Timeout in seconds
|
|
22
|
+
* @param {string} [opts.createdBy='main'] - Who spawned the task group
|
|
23
|
+
* @param {string} [opts.deliveryChannel] - Where to send updates
|
|
24
|
+
* @param {string} [opts.deliveryTo] - Target for updates
|
|
25
|
+
* @returns {{ id: string, name: string, status: string, agents: Array<{agent_label: string, status: string}> }}
|
|
26
|
+
*/
|
|
27
|
+
export function createTaskGroup({ name, expectedAgents, timeoutS = 600, createdBy = 'main', deliveryChannel, deliveryTo }) {
|
|
28
|
+
if (!Array.isArray(expectedAgents) || expectedAgents.length === 0) {
|
|
29
|
+
throw new Error('expectedAgents must be a non-empty array');
|
|
30
|
+
}
|
|
31
|
+
const db = getDb();
|
|
32
|
+
const id = randomUUID();
|
|
33
|
+
const now = sqliteNow();
|
|
34
|
+
|
|
35
|
+
db.prepare(`
|
|
36
|
+
INSERT INTO task_tracker (id, name, created_at, created_by, expected_agents, timeout_s, status, delivery_channel, delivery_to)
|
|
37
|
+
VALUES (?, ?, ?, ?, ?, ?, 'active', ?, ?)
|
|
38
|
+
`).run(id, name, now, createdBy, JSON.stringify(expectedAgents), timeoutS, deliveryChannel || null, deliveryTo || null);
|
|
39
|
+
|
|
40
|
+
const insertAgent = db.prepare(`
|
|
41
|
+
INSERT INTO task_tracker_agents (id, tracker_id, agent_label, status)
|
|
42
|
+
VALUES (?, ?, ?, 'pending')
|
|
43
|
+
`);
|
|
44
|
+
|
|
45
|
+
const agents = [];
|
|
46
|
+
for (const label of expectedAgents) {
|
|
47
|
+
const agentId = randomUUID();
|
|
48
|
+
insertAgent.run(agentId, id, label);
|
|
49
|
+
agents.push({ agent_label: label, status: 'pending' });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return { id, name, status: 'active', created_at: now, created_by: createdBy, agents };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// -- Get task group by id ------------------------------------
|
|
56
|
+
/**
|
|
57
|
+
* @param {string} id
|
|
58
|
+
* @returns {object|undefined}
|
|
59
|
+
*/
|
|
60
|
+
export function getTaskGroup(id) {
|
|
61
|
+
const db = getDb();
|
|
62
|
+
return db.prepare('SELECT * FROM task_tracker WHERE id = ?').get(id);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// -- List active task groups ---------------------------------
|
|
66
|
+
/**
|
|
67
|
+
* @returns {object[]}
|
|
68
|
+
*/
|
|
69
|
+
export function listActiveTaskGroups() {
|
|
70
|
+
const db = getDb();
|
|
71
|
+
return db.prepare("SELECT * FROM task_tracker WHERE status = 'active' ORDER BY created_at DESC").all();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// -- Agent reports it started --------------------------------
|
|
75
|
+
/**
|
|
76
|
+
* @param {string} trackerId
|
|
77
|
+
* @param {string} agentLabel
|
|
78
|
+
* @param {string} [sessionKey] - Optional OC session key for auto-correlation
|
|
79
|
+
*/
|
|
80
|
+
export function agentStarted(trackerId, agentLabel, sessionKey) {
|
|
81
|
+
const db = getDb();
|
|
82
|
+
const now = sqliteNow();
|
|
83
|
+
db.prepare(`
|
|
84
|
+
UPDATE task_tracker_agents
|
|
85
|
+
SET status = 'running', started_at = ?, last_heartbeat = ?, session_key = COALESCE(?, session_key)
|
|
86
|
+
WHERE tracker_id = ? AND agent_label = ?
|
|
87
|
+
`).run(now, now, sessionKey || null, trackerId, agentLabel);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// -- Register session key (orchestrator sets this after spawning) --
|
|
91
|
+
/**
|
|
92
|
+
* Link an OpenClaw session key to a tracker agent.
|
|
93
|
+
* The dispatcher uses this for auto-correlation -- sub-agents don't
|
|
94
|
+
* need to actively heartbeat; the dispatcher detects them via sessions_list.
|
|
95
|
+
* @param {string} trackerId
|
|
96
|
+
* @param {string} agentLabel
|
|
97
|
+
* @param {string} sessionKey - e.g. "agent:main:subagent:abc-123"
|
|
98
|
+
*/
|
|
99
|
+
export function registerAgentSession(trackerId, agentLabel, sessionKey) {
|
|
100
|
+
const db = getDb();
|
|
101
|
+
const now = sqliteNow();
|
|
102
|
+
db.prepare(`
|
|
103
|
+
UPDATE task_tracker_agents
|
|
104
|
+
SET session_key = ?, last_heartbeat = ?,
|
|
105
|
+
status = CASE WHEN status = 'pending' THEN 'running' ELSE status END,
|
|
106
|
+
started_at = CASE WHEN started_at IS NULL THEN ? ELSE started_at END
|
|
107
|
+
WHERE tracker_id = ? AND agent_label = ?
|
|
108
|
+
`).run(sessionKey, now, now, trackerId, agentLabel);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// -- Touch heartbeat (called by auto-correlation) ------------
|
|
112
|
+
/**
|
|
113
|
+
* @param {string} trackerId
|
|
114
|
+
* @param {string} agentLabel
|
|
115
|
+
*/
|
|
116
|
+
export function touchAgentHeartbeat(trackerId, agentLabel) {
|
|
117
|
+
const db = getDb();
|
|
118
|
+
const now = sqliteNow();
|
|
119
|
+
db.prepare(`
|
|
120
|
+
UPDATE task_tracker_agents
|
|
121
|
+
SET last_heartbeat = ?,
|
|
122
|
+
status = CASE WHEN status = 'pending' THEN 'running' ELSE status END,
|
|
123
|
+
started_at = CASE WHEN started_at IS NULL THEN ? ELSE started_at END
|
|
124
|
+
WHERE tracker_id = ? AND agent_label = ?
|
|
125
|
+
`).run(now, now, trackerId, agentLabel);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// -- Agent reports completion --------------------------------
|
|
129
|
+
/**
|
|
130
|
+
* @param {string} trackerId
|
|
131
|
+
* @param {string} agentLabel
|
|
132
|
+
* @param {string} [exitMessage]
|
|
133
|
+
*/
|
|
134
|
+
export function agentCompleted(trackerId, agentLabel, exitMessage) {
|
|
135
|
+
const db = getDb();
|
|
136
|
+
const now = sqliteNow();
|
|
137
|
+
db.prepare(`
|
|
138
|
+
UPDATE task_tracker_agents
|
|
139
|
+
SET status = 'completed', finished_at = ?, exit_message = ?
|
|
140
|
+
WHERE tracker_id = ? AND agent_label = ?
|
|
141
|
+
`).run(now, exitMessage || null, trackerId, agentLabel);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// -- Agent reports failure -----------------------------------
|
|
145
|
+
/**
|
|
146
|
+
* @param {string} trackerId
|
|
147
|
+
* @param {string} agentLabel
|
|
148
|
+
* @param {string} [error]
|
|
149
|
+
*/
|
|
150
|
+
export function agentFailed(trackerId, agentLabel, error) {
|
|
151
|
+
const db = getDb();
|
|
152
|
+
const now = sqliteNow();
|
|
153
|
+
db.prepare(`
|
|
154
|
+
UPDATE task_tracker_agents
|
|
155
|
+
SET status = 'failed', finished_at = ?, error = ?
|
|
156
|
+
WHERE tracker_id = ? AND agent_label = ?
|
|
157
|
+
`).run(now, error || null, trackerId, agentLabel);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// -- Check for dead agents (timeout exceeded) ----------------
|
|
161
|
+
/**
|
|
162
|
+
* Find agents with status IN ('pending','running') whose tracker has timed out.
|
|
163
|
+
* An agent is NOT dead if it sent a heartbeat within the last 5 minutes
|
|
164
|
+
* (session correlation keeps them alive).
|
|
165
|
+
* @returns {Array<{tracker_id: string, agent_label: string, agent_id: string}>}
|
|
166
|
+
*/
|
|
167
|
+
export function checkDeadAgents() {
|
|
168
|
+
const db = getDb();
|
|
169
|
+
const now = sqliteNow();
|
|
170
|
+
|
|
171
|
+
// Find agents in active trackers that have exceeded timeout
|
|
172
|
+
// BUT: spare agents with a recent heartbeat (within 5 min) -- they're still alive
|
|
173
|
+
const deadAgents = db.prepare(`
|
|
174
|
+
SELECT a.id as agent_id, a.tracker_id, a.agent_label, a.status as agent_status,
|
|
175
|
+
t.timeout_s, t.created_at as tracker_created_at
|
|
176
|
+
FROM task_tracker_agents a
|
|
177
|
+
JOIN task_tracker t ON a.tracker_id = t.id
|
|
178
|
+
WHERE a.status IN ('pending', 'running')
|
|
179
|
+
AND t.status = 'active'
|
|
180
|
+
AND (julianday(?) - julianday(t.created_at)) * 86400 >= t.timeout_s
|
|
181
|
+
AND (a.last_heartbeat IS NULL
|
|
182
|
+
OR (julianday(?) - julianday(a.last_heartbeat)) * 86400 > CASE WHEN t.timeout_s < 300 THEN t.timeout_s ELSE 300 END)
|
|
183
|
+
`).all(now, now);
|
|
184
|
+
|
|
185
|
+
// Mark them as dead
|
|
186
|
+
const markDead = db.prepare(`
|
|
187
|
+
UPDATE task_tracker_agents
|
|
188
|
+
SET status = 'dead', finished_at = ?, error = 'Timed out (dead-man switch)'
|
|
189
|
+
WHERE id = ?
|
|
190
|
+
`);
|
|
191
|
+
|
|
192
|
+
for (const agent of deadAgents) {
|
|
193
|
+
markDead.run(now, agent.agent_id);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Check group completion for each affected tracker
|
|
197
|
+
const trackerIds = [...new Set(deadAgents.map(a => a.tracker_id))];
|
|
198
|
+
for (const trackerId of trackerIds) {
|
|
199
|
+
checkGroupCompletion(trackerId);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return deadAgents;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// -- Check if all agents in a group are done -----------------
|
|
206
|
+
/**
|
|
207
|
+
* If all agents are in terminal state (completed/failed/dead), mark the tracker.
|
|
208
|
+
* Status = 'completed' if all succeeded, 'failed' if any failed/dead.
|
|
209
|
+
* @param {string} trackerId
|
|
210
|
+
* @returns {object|null} - The updated tracker, or null if not yet complete
|
|
211
|
+
*/
|
|
212
|
+
export function checkGroupCompletion(trackerId) {
|
|
213
|
+
const db = getDb();
|
|
214
|
+
const now = sqliteNow();
|
|
215
|
+
|
|
216
|
+
const tracker = db.prepare('SELECT * FROM task_tracker WHERE id = ?').get(trackerId);
|
|
217
|
+
if (!tracker || tracker.status !== 'active') return null;
|
|
218
|
+
|
|
219
|
+
const agents = db.prepare('SELECT * FROM task_tracker_agents WHERE tracker_id = ?').all(trackerId);
|
|
220
|
+
if (agents.length === 0) return null;
|
|
221
|
+
|
|
222
|
+
const terminalStatuses = ['completed', 'failed', 'dead'];
|
|
223
|
+
const allTerminal = agents.every(a => terminalStatuses.includes(a.status));
|
|
224
|
+
if (!allTerminal) return null;
|
|
225
|
+
|
|
226
|
+
// Determine group status
|
|
227
|
+
const anyFailed = agents.some(a => a.status === 'failed' || a.status === 'dead');
|
|
228
|
+
const groupStatus = anyFailed ? 'failed' : 'completed';
|
|
229
|
+
|
|
230
|
+
// Build summary
|
|
231
|
+
const summaryParts = agents.map(a => {
|
|
232
|
+
const label = a.agent_label;
|
|
233
|
+
if (a.status === 'completed') return `[ok] ${label}: ${a.exit_message || 'done'}`;
|
|
234
|
+
if (a.status === 'failed') return `[FAILED] ${label}: ${a.error || 'failed'}`;
|
|
235
|
+
if (a.status === 'dead') return `[DEAD] ${label}: timed out`;
|
|
236
|
+
return `[${a.status}] ${label}`;
|
|
237
|
+
});
|
|
238
|
+
const summary = summaryParts.join('\n');
|
|
239
|
+
|
|
240
|
+
db.prepare(`
|
|
241
|
+
UPDATE task_tracker
|
|
242
|
+
SET status = ?, completed_at = ?, summary = ?
|
|
243
|
+
WHERE id = ?
|
|
244
|
+
`).run(groupStatus, now, summary, trackerId);
|
|
245
|
+
|
|
246
|
+
return { ...tracker, status: groupStatus, completed_at: now, summary };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// -- Get status summary for a task group ---------------------
|
|
250
|
+
/**
|
|
251
|
+
* @param {string} trackerId
|
|
252
|
+
* @returns {{ name: string, status: string, agents: Array<{label: string, status: string, duration: number|null, exit_message?: string, error?: string}>, elapsed: number, remaining_timeout: number }}
|
|
253
|
+
*/
|
|
254
|
+
export function getTaskGroupStatus(trackerId) {
|
|
255
|
+
const db = getDb();
|
|
256
|
+
|
|
257
|
+
const tracker = db.prepare('SELECT * FROM task_tracker WHERE id = ?').get(trackerId);
|
|
258
|
+
if (!tracker) return null;
|
|
259
|
+
|
|
260
|
+
const agents = db.prepare('SELECT * FROM task_tracker_agents WHERE tracker_id = ? ORDER BY agent_label').all(trackerId);
|
|
261
|
+
|
|
262
|
+
const now = new Date();
|
|
263
|
+
const createdAt = parseSqliteDate(tracker.created_at) || new Date();
|
|
264
|
+
const elapsedS = Math.floor((now - createdAt) / 1000);
|
|
265
|
+
const remainingTimeout = Math.max(0, tracker.timeout_s - elapsedS);
|
|
266
|
+
|
|
267
|
+
const agentStatuses = agents.map(a => {
|
|
268
|
+
let duration = null;
|
|
269
|
+
if (a.started_at && a.finished_at) {
|
|
270
|
+
const start = parseSqliteDate(a.started_at);
|
|
271
|
+
const end = parseSqliteDate(a.finished_at);
|
|
272
|
+
if (start && end) duration = Math.floor((end - start) / 1000);
|
|
273
|
+
} else if (a.started_at) {
|
|
274
|
+
const start = parseSqliteDate(a.started_at);
|
|
275
|
+
if (start) duration = Math.floor((now - start) / 1000);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
label: a.agent_label,
|
|
280
|
+
status: a.status,
|
|
281
|
+
session_key: a.session_key || undefined,
|
|
282
|
+
last_heartbeat: a.last_heartbeat || undefined,
|
|
283
|
+
duration,
|
|
284
|
+
exit_message: a.exit_message || undefined,
|
|
285
|
+
error: a.error || undefined,
|
|
286
|
+
};
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
id: tracker.id,
|
|
291
|
+
name: tracker.name,
|
|
292
|
+
status: tracker.status,
|
|
293
|
+
agents: agentStatuses,
|
|
294
|
+
elapsed: elapsedS,
|
|
295
|
+
remaining_timeout: remainingTimeout,
|
|
296
|
+
summary: tracker.summary || undefined,
|
|
297
|
+
delivery_channel: tracker.delivery_channel,
|
|
298
|
+
delivery_to: tracker.delivery_to,
|
|
299
|
+
};
|
|
300
|
+
}
|