openclaw-scheduler 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +16 -6
- package/cli.js +13 -4
- package/dispatch/README.md +18 -3
- package/dispatch/completion.mjs +1312 -34
- package/dispatch/hooks.mjs +17 -5
- package/dispatch/index.mjs +600 -226
- package/dispatch/message-input.mjs +67 -0
- package/dispatch/watcher.mjs +381 -43
- package/dispatcher-strategies.js +203 -30
- package/dispatcher.js +6 -1
- package/gateway.js +71 -8
- package/index.d.ts +1 -0
- package/package.json +3 -1
- package/scripts/dispatch-cli-utils.mjs +53 -0
- package/scripts/inbox-watcher-guardrail.mjs +506 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { execFileSync } from 'child_process';
|
|
4
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
5
|
+
import { dirname, join } from 'path';
|
|
6
|
+
import { homedir } from 'os';
|
|
7
|
+
import { pathToFileURL } from 'url';
|
|
8
|
+
import { getDb } from '../db.js';
|
|
9
|
+
import { resolveSchedulerHome } from '../paths.js';
|
|
10
|
+
|
|
11
|
+
function parseArgs(argv) {
|
|
12
|
+
const out = {};
|
|
13
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
14
|
+
const arg = argv[index];
|
|
15
|
+
if (!arg.startsWith('--')) continue;
|
|
16
|
+
const key = arg.slice(2);
|
|
17
|
+
const next = argv[index + 1];
|
|
18
|
+
if (next && !next.startsWith('--')) {
|
|
19
|
+
out[key] = next;
|
|
20
|
+
index += 1;
|
|
21
|
+
} else {
|
|
22
|
+
out[key] = true;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return out;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function parsePositiveInt(value, fallback, { allowZero = false } = {}) {
|
|
29
|
+
const parsed = Number.parseInt(String(value ?? ''), 10);
|
|
30
|
+
if (!Number.isInteger(parsed)) return fallback;
|
|
31
|
+
if (allowZero ? parsed >= 0 : parsed > 0) return parsed;
|
|
32
|
+
return fallback;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function firstNonEmpty(...values) {
|
|
36
|
+
for (const value of values) {
|
|
37
|
+
if (typeof value !== 'string') continue;
|
|
38
|
+
const trimmed = value.trim();
|
|
39
|
+
if (trimmed) return trimmed;
|
|
40
|
+
}
|
|
41
|
+
return '';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function sqliteTimestampToMs(value) {
|
|
45
|
+
if (!value) return null;
|
|
46
|
+
const normalized = value.includes('T') ? value : value.replace(' ', 'T');
|
|
47
|
+
const date = new Date(normalized.endsWith('Z') ? normalized : `${normalized}Z`);
|
|
48
|
+
return Number.isNaN(date.getTime()) ? null : date.getTime();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isoNow(nowMs = Date.now()) {
|
|
52
|
+
return new Date(nowMs).toISOString();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function formatDuration(seconds) {
|
|
56
|
+
if (!Number.isFinite(seconds) || seconds <= 0) return '0s';
|
|
57
|
+
if (seconds < 60) return `${seconds}s`;
|
|
58
|
+
const minutes = Math.floor(seconds / 60);
|
|
59
|
+
if (minutes < 60) return `${minutes}m`;
|
|
60
|
+
const hours = Math.floor(minutes / 60);
|
|
61
|
+
if (hours < 24) return `${hours}h ${minutes % 60}m`;
|
|
62
|
+
return `${Math.floor(hours / 24)}d ${hours % 24}h`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function loadJson(path, fallback) {
|
|
66
|
+
try {
|
|
67
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
68
|
+
} catch {
|
|
69
|
+
return fallback;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function saveJson(path, value) {
|
|
74
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
75
|
+
writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function parseLaunchctlPrint(text) {
|
|
79
|
+
const readString = (regex) => text.match(regex)?.[1]?.trim() || '';
|
|
80
|
+
const parseNumber = (value) => {
|
|
81
|
+
const parsed = Number.parseInt(value, 10);
|
|
82
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
exists: true,
|
|
87
|
+
state: readString(/^\s*state\s*=\s*(.+)$/m),
|
|
88
|
+
pid: parseNumber(readString(/^\s*pid\s*=\s*(.+)$/m)),
|
|
89
|
+
runs: parseNumber(readString(/^\s*runs\s*=\s*(.+)$/m)),
|
|
90
|
+
lastExitCode: parseNumber(readString(/^\s*last exit code\s*=\s*(.+)$/m)),
|
|
91
|
+
lastTerminatingSignal: readString(/^\s*last terminating signal\s*=\s*(.+)$/m),
|
|
92
|
+
raw: text,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function inspectLaunchctl(label, uid) {
|
|
97
|
+
const service = `gui/${uid}/${label}`;
|
|
98
|
+
try {
|
|
99
|
+
const text = execFileSync('launchctl', ['print', service], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
|
100
|
+
return { service, ...parseLaunchctlPrint(text) };
|
|
101
|
+
} catch (err) {
|
|
102
|
+
return {
|
|
103
|
+
service,
|
|
104
|
+
exists: false,
|
|
105
|
+
state: 'missing',
|
|
106
|
+
pid: null,
|
|
107
|
+
runs: null,
|
|
108
|
+
lastExitCode: null,
|
|
109
|
+
lastTerminatingSignal: '',
|
|
110
|
+
raw: String(err.stdout || ''),
|
|
111
|
+
error: firstNonEmpty(String(err.stderr || '').trim(), err.message),
|
|
112
|
+
exitCode: err.status ?? 1,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function inspectPlist(plistPath) {
|
|
118
|
+
if (!existsSync(plistPath)) {
|
|
119
|
+
return {
|
|
120
|
+
exists: false,
|
|
121
|
+
plistPath,
|
|
122
|
+
programArguments: [],
|
|
123
|
+
scriptPath: '',
|
|
124
|
+
scriptExists: false,
|
|
125
|
+
error: 'plist missing',
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
const json = execFileSync('plutil', ['-convert', 'json', '-o', '-', plistPath], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
|
131
|
+
const data = JSON.parse(json);
|
|
132
|
+
const programArguments = Array.isArray(data.ProgramArguments) ? data.ProgramArguments : [];
|
|
133
|
+
const scriptPath = typeof programArguments[1] === 'string' ? programArguments[1] : '';
|
|
134
|
+
return {
|
|
135
|
+
exists: true,
|
|
136
|
+
plistPath,
|
|
137
|
+
programArguments,
|
|
138
|
+
scriptPath,
|
|
139
|
+
scriptExists: scriptPath ? existsSync(scriptPath) : false,
|
|
140
|
+
label: typeof data.Label === 'string' ? data.Label : '',
|
|
141
|
+
};
|
|
142
|
+
} catch (err) {
|
|
143
|
+
return {
|
|
144
|
+
exists: true,
|
|
145
|
+
plistPath,
|
|
146
|
+
programArguments: [],
|
|
147
|
+
scriptPath: '',
|
|
148
|
+
scriptExists: false,
|
|
149
|
+
error: firstNonEmpty(String(err.stderr || '').trim(), err.message),
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function inspectQueue(db, agentId, nowMs) {
|
|
155
|
+
const row = db.prepare(`
|
|
156
|
+
SELECT COUNT(*) AS pending_count,
|
|
157
|
+
MIN(created_at) AS oldest_created_at,
|
|
158
|
+
MAX(created_at) AS newest_created_at
|
|
159
|
+
FROM messages
|
|
160
|
+
WHERE status = 'pending'
|
|
161
|
+
AND (to_agent = ? OR to_agent = 'broadcast')
|
|
162
|
+
`).get(agentId);
|
|
163
|
+
|
|
164
|
+
const pendingCount = row?.pending_count ?? 0;
|
|
165
|
+
const oldestCreatedAt = row?.oldest_created_at || null;
|
|
166
|
+
const newestCreatedAt = row?.newest_created_at || null;
|
|
167
|
+
const oldestMs = sqliteTimestampToMs(oldestCreatedAt);
|
|
168
|
+
const newestMs = sqliteTimestampToMs(newestCreatedAt);
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
pendingCount,
|
|
172
|
+
oldestCreatedAt,
|
|
173
|
+
newestCreatedAt,
|
|
174
|
+
oldestAgeSec: oldestMs ? Math.max(0, Math.floor((nowMs - oldestMs) / 1000)) : 0,
|
|
175
|
+
newestAgeSec: newestMs ? Math.max(0, Math.floor((nowMs - newestMs) / 1000)) : 0,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export function evaluateInboxWatcherHealth({
|
|
180
|
+
launchctl,
|
|
181
|
+
plist,
|
|
182
|
+
queue,
|
|
183
|
+
previousObservation = null,
|
|
184
|
+
nowMs = Date.now(),
|
|
185
|
+
ageThresholdSec = 600,
|
|
186
|
+
countThreshold = 10,
|
|
187
|
+
crashLoopWindowSec = 900,
|
|
188
|
+
crashLoopRunsThreshold = 3,
|
|
189
|
+
}) {
|
|
190
|
+
const issues = [];
|
|
191
|
+
|
|
192
|
+
if (!plist?.exists) {
|
|
193
|
+
issues.push({ code: 'plist_missing', detail: plist?.plistPath || 'LaunchAgent plist missing' });
|
|
194
|
+
} else if (plist?.error) {
|
|
195
|
+
issues.push({ code: 'plist_unreadable', detail: plist.error });
|
|
196
|
+
} else if (!plist?.scriptPath) {
|
|
197
|
+
issues.push({ code: 'plist_script_path_missing', detail: 'ProgramArguments[1] missing' });
|
|
198
|
+
} else if (!plist?.scriptExists) {
|
|
199
|
+
issues.push({ code: 'plist_script_missing', detail: plist.scriptPath });
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (!launchctl?.exists) {
|
|
203
|
+
issues.push({ code: 'watcher_missing', detail: launchctl?.error || 'launchctl could not find service' });
|
|
204
|
+
} else if (launchctl.state !== 'running' || !launchctl.pid) {
|
|
205
|
+
issues.push({
|
|
206
|
+
code: 'watcher_not_running',
|
|
207
|
+
detail: `state=${launchctl.state || 'unknown'} pid=${launchctl.pid ?? 'none'}`,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const previousRuns = previousObservation?.launchctl?.runs;
|
|
212
|
+
const previousObservedAtMs = sqliteTimestampToMs(previousObservation?.observedAt) || new Date(previousObservation?.observedAt || '').getTime();
|
|
213
|
+
if (
|
|
214
|
+
launchctl?.exists
|
|
215
|
+
&& Number.isInteger(launchctl.runs)
|
|
216
|
+
&& Number.isInteger(previousRuns)
|
|
217
|
+
&& Number.isFinite(previousObservedAtMs)
|
|
218
|
+
) {
|
|
219
|
+
const runsDelta = launchctl.runs - previousRuns;
|
|
220
|
+
const ageSec = Math.max(0, Math.floor((nowMs - previousObservedAtMs) / 1000));
|
|
221
|
+
if (runsDelta >= crashLoopRunsThreshold && ageSec <= crashLoopWindowSec) {
|
|
222
|
+
issues.push({ code: 'watcher_crash_loop', detail: `runs+${runsDelta} in ${ageSec}s` });
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if ((queue?.pendingCount || 0) >= countThreshold) {
|
|
227
|
+
issues.push({ code: 'queue_piling', detail: `pending=${queue.pendingCount}` });
|
|
228
|
+
}
|
|
229
|
+
if ((queue?.oldestAgeSec || 0) >= ageThresholdSec) {
|
|
230
|
+
issues.push({ code: 'queue_stale', detail: `oldest=${queue.oldestAgeSec}s` });
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const issueCodes = [...new Set(issues.map(issue => issue.code))];
|
|
234
|
+
const shouldKickstart = launchctl?.exists
|
|
235
|
+
&& plist?.scriptExists
|
|
236
|
+
&& issueCodes.some(code => ['watcher_not_running', 'watcher_crash_loop', 'queue_piling', 'queue_stale'].includes(code));
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
status: issueCodes.length > 0 ? 'ALERT' : 'OK',
|
|
240
|
+
issues,
|
|
241
|
+
issueCodes,
|
|
242
|
+
shouldKickstart,
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function summarizeIssueCode(code) {
|
|
247
|
+
switch (code) {
|
|
248
|
+
case 'plist_missing': return 'LaunchAgent plist missing';
|
|
249
|
+
case 'plist_unreadable': return 'LaunchAgent plist unreadable';
|
|
250
|
+
case 'plist_script_path_missing': return 'LaunchAgent ProgramArguments missing script path';
|
|
251
|
+
case 'plist_script_missing': return 'LaunchAgent script path does not exist';
|
|
252
|
+
case 'watcher_missing': return 'launchctl cannot find com.openclaw.inbox-watcher';
|
|
253
|
+
case 'watcher_not_running': return 'inbox watcher is not running';
|
|
254
|
+
case 'watcher_crash_loop': return 'inbox watcher appears to be crash-looping';
|
|
255
|
+
case 'queue_piling': return 'pending inbox queue is piling up';
|
|
256
|
+
case 'queue_stale': return 'pending inbox messages are too old';
|
|
257
|
+
default: return code;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function buildAlertMessage({
|
|
262
|
+
finalStatus,
|
|
263
|
+
detectedIssueCodes,
|
|
264
|
+
preSnapshot,
|
|
265
|
+
postSnapshot,
|
|
266
|
+
actionTaken,
|
|
267
|
+
thresholds,
|
|
268
|
+
}) {
|
|
269
|
+
const icon = finalStatus === 'ALERT' ? '🚨' : '⚠️';
|
|
270
|
+
const headline = finalStatus === 'ALERT'
|
|
271
|
+
? `${icon} Inbox watcher delivery path needs attention on rh-bot.`
|
|
272
|
+
: `${icon} Inbox watcher delivery path glitched on rh-bot but auto-recovered.`;
|
|
273
|
+
|
|
274
|
+
const issues = detectedIssueCodes.map(summarizeIssueCode).join('; ');
|
|
275
|
+
const preQueue = `pending=${preSnapshot.queue.pendingCount}, oldest=${formatDuration(preSnapshot.queue.oldestAgeSec)}`;
|
|
276
|
+
const postQueue = `pending=${postSnapshot.queue.pendingCount}, oldest=${formatDuration(postSnapshot.queue.oldestAgeSec)}`;
|
|
277
|
+
const watcherState = `state=${postSnapshot.launchctl.state || 'missing'}, pid=${postSnapshot.launchctl.pid ?? 'none'}, runs=${postSnapshot.launchctl.runs ?? 'unknown'}`;
|
|
278
|
+
const kickstart = actionTaken
|
|
279
|
+
? `Self-heal: launchctl kickstart ${actionTaken.ok ? 'succeeded' : `failed (${actionTaken.error || 'unknown error'})`}.`
|
|
280
|
+
: 'Self-heal: not attempted.';
|
|
281
|
+
const configHint = preSnapshot.plist.scriptExists
|
|
282
|
+
? ''
|
|
283
|
+
: ` LaunchAgent script path: ${preSnapshot.plist.scriptPath || 'missing'}.`;
|
|
284
|
+
|
|
285
|
+
return [
|
|
286
|
+
headline,
|
|
287
|
+
`Detected: ${issues}.`,
|
|
288
|
+
`Queue before check: ${preQueue}. Queue after check: ${postQueue}.`,
|
|
289
|
+
`Watcher after check: ${watcherState}.`,
|
|
290
|
+
kickstart,
|
|
291
|
+
`Triggers: queue oldest >= ${formatDuration(thresholds.ageThresholdSec)} or pending >= ${thresholds.countThreshold}, plus watcher state / crash-loop checks.${configHint}`,
|
|
292
|
+
`Action: launchctl print gui/${process.getuid()}/com.openclaw.inbox-watcher; tail -n 80 ~/.openclaw/logs/inbox-watcher.log; sqlite3 ~/.openclaw/scheduler/scheduler.db "select count(*), min(created_at) from messages where status='pending' and (to_agent='main' or to_agent='broadcast');"`,
|
|
293
|
+
].join(' ');
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function decideNotification({ finalStatus, detectedIssueCodes, previousState, nowMs, alertCooldownSec }) {
|
|
297
|
+
if (finalStatus === 'OK') {
|
|
298
|
+
return { notify: false, key: 'OK', suppressed: false };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const key = `${finalStatus}:${detectedIssueCodes.slice().sort().join(',')}`;
|
|
302
|
+
const lastAlertAtMs = sqliteTimestampToMs(previousState?.lastAlertAt) || new Date(previousState?.lastAlertAt || '').getTime();
|
|
303
|
+
const withinCooldown = Number.isFinite(lastAlertAtMs) && (nowMs - lastAlertAtMs) < (alertCooldownSec * 1000);
|
|
304
|
+
const sameKey = previousState?.lastAlertKey === key;
|
|
305
|
+
|
|
306
|
+
if (withinCooldown && sameKey) {
|
|
307
|
+
return { notify: false, key, suppressed: true };
|
|
308
|
+
}
|
|
309
|
+
return { notify: true, key, suppressed: false };
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
async function sendTelegramMessage({ botToken, chatId, text }) {
|
|
313
|
+
const response = await fetch(`https://api.telegram.org/bot${botToken}/sendMessage`, {
|
|
314
|
+
method: 'POST',
|
|
315
|
+
headers: { 'Content-Type': 'application/json' },
|
|
316
|
+
body: JSON.stringify({ chat_id: chatId, text }),
|
|
317
|
+
});
|
|
318
|
+
const payload = await response.json().catch(() => ({}));
|
|
319
|
+
if (!response.ok || payload.ok === false) {
|
|
320
|
+
throw new Error(payload.description || `Telegram send failed (${response.status})`);
|
|
321
|
+
}
|
|
322
|
+
return payload.result;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function resolveBotToken(openclawConfigPath) {
|
|
326
|
+
if (process.env.TELEGRAM_BOT_TOKEN) return process.env.TELEGRAM_BOT_TOKEN;
|
|
327
|
+
const config = loadJson(openclawConfigPath, {});
|
|
328
|
+
return config?.channels?.telegram?.botToken || '';
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function kickstartWatcher(service) {
|
|
332
|
+
try {
|
|
333
|
+
execFileSync('launchctl', ['kickstart', '-k', service], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
|
334
|
+
return { attempted: true, ok: true, action: 'kickstart' };
|
|
335
|
+
} catch (err) {
|
|
336
|
+
return {
|
|
337
|
+
attempted: true,
|
|
338
|
+
ok: false,
|
|
339
|
+
action: 'kickstart',
|
|
340
|
+
error: firstNonEmpty(String(err.stderr || '').trim(), err.message),
|
|
341
|
+
exitCode: err.status ?? 1,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function sleep(ms) {
|
|
347
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
async function collectSnapshot({ label, plistPath, agentId, nowMs }) {
|
|
351
|
+
const db = getDb();
|
|
352
|
+
return {
|
|
353
|
+
observedAt: isoNow(nowMs),
|
|
354
|
+
launchctl: inspectLaunchctl(label, process.getuid()),
|
|
355
|
+
plist: inspectPlist(plistPath),
|
|
356
|
+
queue: inspectQueue(db, agentId, nowMs),
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async function main() {
|
|
361
|
+
const args = parseArgs(process.argv.slice(2));
|
|
362
|
+
const schedulerHome = resolveSchedulerHome(process.env);
|
|
363
|
+
if (!process.env.SCHEDULER_DB) {
|
|
364
|
+
process.env.SCHEDULER_DB = join(schedulerHome, 'scheduler.db');
|
|
365
|
+
}
|
|
366
|
+
const homeDir = firstNonEmpty(process.env.HOME, homedir());
|
|
367
|
+
const openclawConfigPath = join(homeDir, '.openclaw', 'openclaw.json');
|
|
368
|
+
const plistPath = firstNonEmpty(args['plist-path'], join(homeDir, 'Library', 'LaunchAgents', 'com.openclaw.inbox-watcher.plist'));
|
|
369
|
+
const stateFile = firstNonEmpty(args['state-file'], join(schedulerHome, 'state', 'inbox-watcher-guardrail.json'));
|
|
370
|
+
const label = firstNonEmpty(args.label, 'com.openclaw.inbox-watcher');
|
|
371
|
+
const agentId = firstNonEmpty(args.agent, process.env.INBOX_AGENT, 'main');
|
|
372
|
+
const alertTarget = firstNonEmpty(args['alert-target'], process.env.INBOX_GUARDRAIL_ALERT_TARGET, '484946046');
|
|
373
|
+
const ageThresholdSec = parsePositiveInt(args['queue-age-threshold-sec'] || process.env.INBOX_GUARDRAIL_QUEUE_AGE_THRESHOLD_SEC, 600);
|
|
374
|
+
const countThreshold = parsePositiveInt(args['queue-count-threshold'] || process.env.INBOX_GUARDRAIL_QUEUE_COUNT_THRESHOLD, 10);
|
|
375
|
+
const alertCooldownSec = parsePositiveInt(args['alert-cooldown-sec'] || process.env.INBOX_GUARDRAIL_ALERT_COOLDOWN_SEC, 21600);
|
|
376
|
+
const crashLoopWindowSec = parsePositiveInt(args['crash-loop-window-sec'] || process.env.INBOX_GUARDRAIL_CRASH_LOOP_WINDOW_SEC, 900);
|
|
377
|
+
const crashLoopRunsThreshold = parsePositiveInt(args['crash-loop-runs-threshold'] || process.env.INBOX_GUARDRAIL_CRASH_LOOP_RUNS_THRESHOLD, 3);
|
|
378
|
+
const recheckDelayMs = parsePositiveInt(args['recheck-delay-ms'] || process.env.INBOX_GUARDRAIL_RECHECK_DELAY_MS, 5000, { allowZero: true });
|
|
379
|
+
const dryRun = Boolean(args['dry-run']);
|
|
380
|
+
const jsonMode = Boolean(args.json);
|
|
381
|
+
|
|
382
|
+
const previousState = loadJson(stateFile, {});
|
|
383
|
+
const nowMs = Date.now();
|
|
384
|
+
const thresholds = { ageThresholdSec, countThreshold, crashLoopWindowSec, crashLoopRunsThreshold };
|
|
385
|
+
|
|
386
|
+
const preSnapshot = await collectSnapshot({ label, plistPath, agentId, nowMs });
|
|
387
|
+
const preEval = evaluateInboxWatcherHealth({
|
|
388
|
+
launchctl: preSnapshot.launchctl,
|
|
389
|
+
plist: preSnapshot.plist,
|
|
390
|
+
queue: preSnapshot.queue,
|
|
391
|
+
previousObservation: previousState,
|
|
392
|
+
nowMs,
|
|
393
|
+
...thresholds,
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
let actionTaken = null;
|
|
397
|
+
let postSnapshot = preSnapshot;
|
|
398
|
+
let finalStatus = 'OK';
|
|
399
|
+
let finalEval = preEval;
|
|
400
|
+
let detectedIssueCodes = preEval.issueCodes;
|
|
401
|
+
|
|
402
|
+
if (preEval.status === 'ALERT' && preEval.shouldKickstart && !dryRun) {
|
|
403
|
+
actionTaken = kickstartWatcher(preSnapshot.launchctl.service);
|
|
404
|
+
if (recheckDelayMs > 0) await sleep(recheckDelayMs);
|
|
405
|
+
postSnapshot = await collectSnapshot({ label, plistPath, agentId, nowMs: Date.now() });
|
|
406
|
+
finalEval = evaluateInboxWatcherHealth({
|
|
407
|
+
launchctl: postSnapshot.launchctl,
|
|
408
|
+
plist: postSnapshot.plist,
|
|
409
|
+
queue: postSnapshot.queue,
|
|
410
|
+
previousObservation: previousState,
|
|
411
|
+
nowMs: Date.now(),
|
|
412
|
+
...thresholds,
|
|
413
|
+
});
|
|
414
|
+
detectedIssueCodes = [...new Set([...preEval.issueCodes, ...finalEval.issueCodes])];
|
|
415
|
+
finalStatus = finalEval.status === 'OK' ? 'RECOVERED' : 'ALERT';
|
|
416
|
+
} else if (preEval.status === 'ALERT') {
|
|
417
|
+
finalStatus = 'ALERT';
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
const alertDecision = decideNotification({
|
|
421
|
+
finalStatus,
|
|
422
|
+
detectedIssueCodes,
|
|
423
|
+
previousState,
|
|
424
|
+
nowMs: Date.now(),
|
|
425
|
+
alertCooldownSec,
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
let alertError = '';
|
|
429
|
+
let alertSent = false;
|
|
430
|
+
if (!dryRun && alertDecision.notify) {
|
|
431
|
+
const botToken = resolveBotToken(openclawConfigPath);
|
|
432
|
+
const text = buildAlertMessage({
|
|
433
|
+
finalStatus,
|
|
434
|
+
detectedIssueCodes,
|
|
435
|
+
preSnapshot,
|
|
436
|
+
postSnapshot,
|
|
437
|
+
actionTaken,
|
|
438
|
+
thresholds,
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
if (!botToken) {
|
|
442
|
+
alertError = 'missing telegram bot token';
|
|
443
|
+
} else {
|
|
444
|
+
try {
|
|
445
|
+
await sendTelegramMessage({ botToken, chatId: alertTarget, text });
|
|
446
|
+
alertSent = true;
|
|
447
|
+
} catch (err) {
|
|
448
|
+
alertError = err.message;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
const nextState = {
|
|
454
|
+
observedAt: postSnapshot.observedAt,
|
|
455
|
+
launchctl: {
|
|
456
|
+
state: postSnapshot.launchctl.state,
|
|
457
|
+
pid: postSnapshot.launchctl.pid,
|
|
458
|
+
runs: postSnapshot.launchctl.runs,
|
|
459
|
+
lastExitCode: postSnapshot.launchctl.lastExitCode,
|
|
460
|
+
lastTerminatingSignal: postSnapshot.launchctl.lastTerminatingSignal,
|
|
461
|
+
},
|
|
462
|
+
queue: postSnapshot.queue,
|
|
463
|
+
lastStatus: finalStatus,
|
|
464
|
+
lastIssueCodes: detectedIssueCodes,
|
|
465
|
+
lastAlertAt: alertSent ? isoNow() : previousState.lastAlertAt || null,
|
|
466
|
+
lastAlertKey: alertSent ? alertDecision.key : previousState.lastAlertKey || null,
|
|
467
|
+
lastAlertSent: alertSent,
|
|
468
|
+
lastAlertError: alertError || null,
|
|
469
|
+
lastActionTaken: actionTaken,
|
|
470
|
+
};
|
|
471
|
+
saveJson(stateFile, nextState);
|
|
472
|
+
|
|
473
|
+
const result = {
|
|
474
|
+
status: finalStatus,
|
|
475
|
+
pre: preSnapshot,
|
|
476
|
+
post: postSnapshot,
|
|
477
|
+
evaluation: finalEval,
|
|
478
|
+
detectedIssueCodes,
|
|
479
|
+
thresholds,
|
|
480
|
+
actionTaken,
|
|
481
|
+
alert: {
|
|
482
|
+
notify: alertDecision.notify,
|
|
483
|
+
suppressed: alertDecision.suppressed,
|
|
484
|
+
key: alertDecision.key,
|
|
485
|
+
sent: alertSent,
|
|
486
|
+
error: alertError || null,
|
|
487
|
+
target: alertTarget,
|
|
488
|
+
},
|
|
489
|
+
stateFile,
|
|
490
|
+
dryRun,
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
const queueSummary = `pending=${postSnapshot.queue.pendingCount} oldest=${formatDuration(postSnapshot.queue.oldestAgeSec)}`;
|
|
494
|
+
const watcherSummary = `state=${postSnapshot.launchctl.state || 'missing'} pid=${postSnapshot.launchctl.pid ?? 'none'} runs=${postSnapshot.launchctl.runs ?? 'unknown'}`;
|
|
495
|
+
process.stdout.write(`STATUS ${finalStatus} | queue ${queueSummary} | watcher ${watcherSummary} | alert ${alertSent ? 'sent' : alertDecision.suppressed ? 'suppressed' : alertDecision.notify ? `failed:${alertError || 'unknown'}` : 'not-needed'}\n`);
|
|
496
|
+
if (jsonMode) {
|
|
497
|
+
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (finalStatus === 'ALERT') process.exit(1);
|
|
501
|
+
process.exit(0);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
if (import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
505
|
+
await main();
|
|
506
|
+
}
|