@controlflow-ai/daemon 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +54 -6
  2. package/package.json +3 -1
  3. package/src/agent-avatar.ts +30 -0
  4. package/src/agent-key.ts +28 -0
  5. package/src/agent-permissions.ts +359 -0
  6. package/src/agent-runtime.ts +795 -28
  7. package/src/agent-workspace.ts +183 -0
  8. package/src/app.ts +1970 -79
  9. package/src/args.ts +54 -7
  10. package/src/cli.ts +873 -14
  11. package/src/client.ts +472 -10
  12. package/src/coco.ts +9 -40
  13. package/src/codex.ts +33 -5
  14. package/src/config.ts +28 -4
  15. package/src/console.ts +230 -20
  16. package/src/daemon-client.ts +116 -3
  17. package/src/daemon.ts +936 -98
  18. package/src/db.ts +3128 -122
  19. package/src/delivery-ws.ts +269 -0
  20. package/src/format.ts +4 -1
  21. package/src/lark/cli.ts +3 -3
  22. package/src/lark/event-router.ts +60 -4
  23. package/src/lark/inbound-events.ts +156 -3
  24. package/src/lark/server-integration.ts +659 -111
  25. package/src/lark/ws-daemon.ts +136 -10
  26. package/src/local-api.ts +545 -15
  27. package/src/local-auth.ts +33 -1
  28. package/src/message-attachments.ts +71 -0
  29. package/src/messaging-cli.ts +741 -0
  30. package/src/messaging-status.ts +669 -0
  31. package/src/migrations/024_agents_model.ts +10 -0
  32. package/src/migrations/025_room_archive.ts +44 -0
  33. package/src/migrations/026_project_archive.ts +44 -0
  34. package/src/migrations/027_agent_permission_profiles.ts +16 -0
  35. package/src/migrations/028_lark_websocket_restart_state.ts +16 -0
  36. package/src/migrations/029_held_message_drafts.ts +32 -0
  37. package/src/migrations/030_agent_room_read_state.ts +25 -0
  38. package/src/migrations/031_room_tasks.ts +29 -0
  39. package/src/migrations/032_room_reminders.ts +29 -0
  40. package/src/migrations/033_room_saved_messages.ts +25 -0
  41. package/src/migrations/034_agent_activity_events.ts +27 -0
  42. package/src/migrations/035_agent_avatars.ts +17 -0
  43. package/src/migrations/036_project_agent_defaults.ts +21 -0
  44. package/src/migrations/037_message_attachments.ts +36 -0
  45. package/src/migrations/038_agent_activity_room_scope.ts +64 -0
  46. package/src/migrations/039_message_attachments_path.ts +34 -0
  47. package/src/migrations/040_message_attachments_file_schema.ts +80 -0
  48. package/src/migrations/041_room_system_events.ts +30 -0
  49. package/src/migrations/042_message_attachment_file_kind.ts +52 -0
  50. package/src/migrations/043_room_mode_skill_registry.ts +92 -0
  51. package/src/migrations/044_workflow_runtime.ts +69 -0
  52. package/src/migrations/045_skill_repository_ownership.ts +64 -0
  53. package/src/migrations.ts +69 -1
  54. package/src/neeko.ts +40 -4
  55. package/src/runtime-env.ts +179 -0
  56. package/src/runtime-registry.ts +83 -13
  57. package/src/server.ts +244 -4
  58. package/src/token-file.ts +13 -6
  59. package/src/types.ts +362 -0
  60. package/src/workflow-runtime.ts +275 -0
  61. package/src/web.ts +0 -904
@@ -0,0 +1,669 @@
1
+ import type { DeliveryBacklogSummary } from './db.js';
2
+ export type { DeliveryBacklogSummary } from './db.js';
3
+
4
+ export interface DeliveryConnectionStats {
5
+ connection_id: string;
6
+ computer_id: string | null;
7
+ open_sockets: number;
8
+ last_open_at: string | null;
9
+ last_close_at: string | null;
10
+ last_ping_at: string | null;
11
+ last_pong_at: string | null;
12
+ last_close_code: number | null;
13
+ last_close_reason: string | null;
14
+ pending_agents: Array<{ agent: string; pending: number }>;
15
+ backlog: DeliveryBacklogSummary[];
16
+ }
17
+
18
+ export interface DeliveryWebSocketSummary {
19
+ connections: DeliveryConnectionStats[];
20
+ totals: {
21
+ connections: number;
22
+ open_sockets: number;
23
+ pending_deliveries: number;
24
+ claimed_deliveries: number;
25
+ processing_completed_deliveries: number;
26
+ expired_active_deliveries: number;
27
+ };
28
+ }
29
+
30
+ export interface MessagingDiagnostic {
31
+ code: string;
32
+ level: 'ok' | 'warn';
33
+ message: string;
34
+ hint?: string;
35
+ }
36
+
37
+ export interface MessagingStatusSummary {
38
+ level: 'ok' | 'warn';
39
+ codes: string[];
40
+ warn_count: number;
41
+ }
42
+
43
+ export interface LarkMessagingStatus {
44
+ app_id: string;
45
+ agent?: string | null;
46
+ label?: string | null;
47
+ bot_open_id_known?: boolean;
48
+ authorized_user_count?: number;
49
+ runtime_home?: string;
50
+ config_path?: string;
51
+ db_path?: string | null;
52
+ ws_state: string | null;
53
+ ws_last_connect_at?: string | null;
54
+ ws_next_connect_at?: string | null;
55
+ ws_reconnect_attempts?: number | null;
56
+ ws_last_event_at?: string | null;
57
+ ws_last_event_type?: string | null;
58
+ ws_last_error?: string | null;
59
+ restart_count?: number;
60
+ last_restart_at?: string | null;
61
+ last_restart_reason?: string | null;
62
+ event_count?: number;
63
+ message_event_count?: number;
64
+ provider_event_count?: number;
65
+ provider_message_event_count?: number;
66
+ last_provider_event_at?: string | null;
67
+ last_provider_message_event_at?: string | null;
68
+ db_event_count?: number;
69
+ db_provider_event_count?: number;
70
+ db_probe_event_count?: number;
71
+ db_parse_error_count?: number;
72
+ db_last_event?: unknown;
73
+ db_last_probe_event?: unknown;
74
+ last_ingest_status?: string | null;
75
+ last_delivery_notify?: unknown;
76
+ last_error?: string | null;
77
+ }
78
+
79
+ export interface MessagingStatus {
80
+ summary: MessagingStatusSummary;
81
+ diagnostics: MessagingDiagnostic[];
82
+ lark: { bots: LarkMessagingStatus[] };
83
+ delivery_websocket: DeliveryWebSocketSummary;
84
+ }
85
+
86
+ export interface MessagingHealthDomain {
87
+ level: 'ok' | 'warn';
88
+ codes: string[];
89
+ message: string;
90
+ hint?: string;
91
+ recommended_action?: string;
92
+ details?: Record<string, unknown>;
93
+ }
94
+
95
+ export interface MessagingHealth {
96
+ level: 'ok' | 'warn';
97
+ domains: {
98
+ lark_provider: MessagingHealthDomain;
99
+ delivery_websocket: MessagingHealthDomain;
100
+ delivery_backlog: MessagingHealthDomain;
101
+ };
102
+ summary: MessagingStatusSummary;
103
+ }
104
+
105
+ export interface MessagingHealthLogLine {
106
+ throttle_key: string;
107
+ message: string;
108
+ }
109
+
110
+ type MessagingHealthDomainName = keyof MessagingHealth['domains'];
111
+
112
+ interface LarkStatusForDiagnostics {
113
+ app_id?: unknown;
114
+ ws_state?: unknown;
115
+ agent?: unknown;
116
+ bot_open_id_known?: unknown;
117
+ authorized_user_count?: unknown;
118
+ event_count?: unknown;
119
+ provider_event_count?: unknown;
120
+ db_event_count?: unknown;
121
+ db_provider_event_count?: unknown;
122
+ db_parse_error_count?: unknown;
123
+ db_last_event?: unknown;
124
+ last_error?: unknown;
125
+ ws_last_error?: unknown;
126
+ restart_count?: unknown;
127
+ last_restart_at?: unknown;
128
+ last_restart_reason?: unknown;
129
+ }
130
+
131
+ const DEFAULT_LARK_EVENT_STALE_AFTER_MS = 10 * 60 * 1000;
132
+ const DEFAULT_DELIVERY_WS_PONG_STALE_AFTER_MS = 2 * 60 * 1000;
133
+ const DEFAULT_LARK_RESTART_EFFECT_GRACE_MS = 60 * 1000;
134
+
135
+ function numberField(value: unknown): number {
136
+ return typeof value === 'number' && Number.isFinite(value) ? value : 0;
137
+ }
138
+
139
+ function providerDbEventCount(bot: Pick<LarkMessagingStatus, 'db_event_count' | 'db_provider_event_count'> | { db_event_count?: unknown; db_provider_event_count?: unknown }): number {
140
+ return numberField(bot.db_provider_event_count ?? bot.db_event_count);
141
+ }
142
+
143
+ function stringField(value: unknown): string | null {
144
+ return typeof value === 'string' && value.trim() ? value : null;
145
+ }
146
+
147
+ function larkEventStaleAfterMs(): number {
148
+ const raw = process.env.PAL_LARK_EVENT_STALE_AFTER_MS;
149
+ if (!raw) return DEFAULT_LARK_EVENT_STALE_AFTER_MS;
150
+ const parsed = Number(raw);
151
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_LARK_EVENT_STALE_AFTER_MS;
152
+ }
153
+
154
+ function deliveryWsPongStaleAfterMs(): number {
155
+ const raw = process.env.PAL_DELIVERY_WS_PONG_STALE_AFTER_MS;
156
+ if (!raw) return DEFAULT_DELIVERY_WS_PONG_STALE_AFTER_MS;
157
+ const parsed = Number(raw);
158
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_DELIVERY_WS_PONG_STALE_AFTER_MS;
159
+ }
160
+
161
+ function larkRestartEffectGraceMs(): number {
162
+ const raw = process.env.PAL_LARK_RESTART_EFFECT_GRACE_MS;
163
+ if (!raw) return DEFAULT_LARK_RESTART_EFFECT_GRACE_MS;
164
+ const parsed = Number(raw);
165
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : DEFAULT_LARK_RESTART_EFFECT_GRACE_MS;
166
+ }
167
+
168
+ function parseTimestamp(value: unknown): number | null {
169
+ const text = stringField(value);
170
+ if (!text) return null;
171
+ const normalized = /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$/.test(text)
172
+ ? `${text.replace(' ', 'T')}Z`
173
+ : text;
174
+ const time = Date.parse(normalized);
175
+ return Number.isFinite(time) ? time : null;
176
+ }
177
+
178
+ function parseInboundReceivedAt(value: unknown): number | null {
179
+ if (!value || typeof value !== 'object') return null;
180
+ return parseTimestamp((value as { received_at?: unknown }).received_at);
181
+ }
182
+
183
+ function inboundParseOk(value: unknown): boolean | null {
184
+ if (!value || typeof value !== 'object') return null;
185
+ const parseOk = (value as { parse_ok?: unknown }).parse_ok;
186
+ if (parseOk === 1 || parseOk === true) return true;
187
+ if (parseOk === 0 || parseOk === false) return false;
188
+ return null;
189
+ }
190
+
191
+ function latestEventReceivedAt(bot: LarkMessagingStatus): string | null {
192
+ if (!bot.db_last_event || typeof bot.db_last_event !== 'object') return null;
193
+ const value = (bot.db_last_event as { received_at?: unknown }).received_at;
194
+ return typeof value === 'string' && value.trim() ? value : null;
195
+ }
196
+
197
+ function latestProbeEventReceivedAt(bot: LarkMessagingStatus): string | null {
198
+ if (!bot.db_last_probe_event || typeof bot.db_last_probe_event !== 'object') return null;
199
+ const value = (bot.db_last_probe_event as { received_at?: unknown }).received_at;
200
+ return typeof value === 'string' && value.trim() ? value : null;
201
+ }
202
+
203
+ function warningDiagnostics(diagnostics: MessagingDiagnostic[], codes: string[]): MessagingDiagnostic[] {
204
+ const wanted = new Set(codes);
205
+ return diagnostics.filter((diagnostic) => diagnostic.level === 'warn' && wanted.has(diagnostic.code));
206
+ }
207
+
208
+ function prioritizedDiagnostics(diagnostics: MessagingDiagnostic[], priority: string[]): MessagingDiagnostic[] {
209
+ const rank = new Map(priority.map((code, index) => [code, index]));
210
+ return [...diagnostics].sort((a, b) => (rank.get(a.code) ?? Number.MAX_SAFE_INTEGER) - (rank.get(b.code) ?? Number.MAX_SAFE_INTEGER));
211
+ }
212
+
213
+ function firstHint(diagnostics: MessagingDiagnostic[]): string | undefined {
214
+ return diagnostics.find((diagnostic) => diagnostic.hint)?.hint;
215
+ }
216
+
217
+ function larkProviderHint(codes: string[], diagnostics: MessagingDiagnostic[]): string | undefined {
218
+ const present = new Set(codes);
219
+ const clusterHint = 'Feishu long-connection delivery is cluster-mode, so if another client for the same app is online, fresh events may be delivered there instead of this Pal server.';
220
+ if (
221
+ !present.has('lark_restart_ineffective')
222
+ && present.has('lark_no_events_since_restart')
223
+ && present.has('lark_events_stale')
224
+ ) {
225
+ return `Pal is connected to Lark but fresh Feishu messages are not reaching this server; inspect Feishu long-connection event subscription and other consumers of this app stream. ${clusterHint}`;
226
+ }
227
+ const hint = firstHint(diagnostics);
228
+ return hint && present.has('lark_restart_ineffective') ? `${hint} ${clusterHint}` : hint;
229
+ }
230
+
231
+ function staleDeliveryHeartbeatConnections(summary: DeliveryWebSocketSummary): DeliveryConnectionStats[] {
232
+ if (summary.totals.open_sockets <= 0) return [];
233
+ const now = Date.now();
234
+ const staleAfterMs = deliveryWsPongStaleAfterMs();
235
+ return summary.connections.filter((connection) => {
236
+ if (connection.open_sockets <= 0) return false;
237
+ const lastPongAt = parseTimestamp(connection.last_pong_at);
238
+ if (lastPongAt !== null) return now - lastPongAt > staleAfterMs;
239
+ const referenceAt = parseTimestamp(connection.last_ping_at) ?? parseTimestamp(connection.last_open_at);
240
+ return referenceAt !== null && now - referenceAt > staleAfterMs;
241
+ });
242
+ }
243
+
244
+ function recommendedAction(codes: string[], rules: Array<[string, string]>): string | undefined {
245
+ const present = new Set(codes);
246
+ return rules.find(([code]) => present.has(code))?.[1];
247
+ }
248
+
249
+ function recommendLarkStaleNoIngressAction(codes: string[]): string | undefined {
250
+ const present = new Set(codes);
251
+ if (present.has('lark_no_events_since_restart') && present.has('lark_events_stale')) {
252
+ return 'inspect_feishu_event_subscription';
253
+ }
254
+ return undefined;
255
+ }
256
+
257
+ const LARK_PROVIDER_ACTION_RULES: Array<[string, string]> = [
258
+ ['lark_error', 'repair_lark_websocket'],
259
+ ['lark_ws_not_connected', 'repair_lark_websocket'],
260
+ ['lark_restart_ineffective', 'inspect_feishu_event_subscription'],
261
+ ['lark_probe_only_since_restart', 'inspect_feishu_event_subscription'],
262
+ ['lark_no_events_since_restart', 'send_real_feishu_message_then_watch_lark'],
263
+ ['lark_events_stale', 'repair_lark_websocket'],
264
+ ['lark_no_bots', 'configure_lark_bot'],
265
+ ['lark_no_bound_agent', 'bind_lark_bot_agent'],
266
+ ['lark_no_authorized_users', 'authorize_lark_user'],
267
+ ['lark_bot_open_id_unknown', 'refresh_lark_binding'],
268
+ ['lark_raw_event_parse_errors', 'repair_lark_event_parse'],
269
+ ];
270
+
271
+ export function recommendLarkProviderAction(codes: string[]): string | undefined {
272
+ return recommendLarkStaleNoIngressAction(codes) ?? recommendedAction(codes, LARK_PROVIDER_ACTION_RULES);
273
+ }
274
+
275
+ export function emptyDeliveryWebSocketSummary(): DeliveryWebSocketSummary {
276
+ return {
277
+ connections: [],
278
+ totals: {
279
+ connections: 0,
280
+ open_sockets: 0,
281
+ pending_deliveries: 0,
282
+ claimed_deliveries: 0,
283
+ processing_completed_deliveries: 0,
284
+ expired_active_deliveries: 0,
285
+ },
286
+ };
287
+ }
288
+
289
+ export function buildMessagingStatus(input: { larkBots: LarkMessagingStatus[]; deliveryWebSocket: DeliveryWebSocketSummary }): MessagingStatus {
290
+ const diagnostics = buildMessagingDiagnostics({
291
+ larkBots: input.larkBots,
292
+ deliveryWebSocket: input.deliveryWebSocket,
293
+ });
294
+ return {
295
+ summary: summarizeMessagingDiagnostics(diagnostics),
296
+ diagnostics,
297
+ lark: { bots: input.larkBots },
298
+ delivery_websocket: input.deliveryWebSocket,
299
+ };
300
+ }
301
+
302
+ export function buildMessagingDiagnostics(input: { larkBots: unknown[]; deliveryWebSocket: DeliveryWebSocketSummary }): MessagingDiagnostic[] {
303
+ const diagnostics: MessagingDiagnostic[] = [];
304
+ const staleAfterMs = larkEventStaleAfterMs();
305
+ const now = Date.now();
306
+ if (input.larkBots.length === 0) {
307
+ diagnostics.push({
308
+ code: 'lark_no_bots',
309
+ level: 'warn',
310
+ message: 'No Lark bots are active on this server.',
311
+ hint: 'Configure a Lark bot and reload the server integration before expecting Feishu messages.',
312
+ });
313
+ }
314
+ for (const bot of input.larkBots as LarkStatusForDiagnostics[]) {
315
+ const appId = stringField(bot.app_id) ?? 'unknown';
316
+ const wsState = stringField(bot.ws_state);
317
+ const boundAgent = stringField(bot.agent);
318
+ const hasAgentField = Object.prototype.hasOwnProperty.call(bot, 'agent');
319
+ const botOpenIdKnown = bot.bot_open_id_known === true;
320
+ const hasBotOpenIdKnownField = typeof bot.bot_open_id_known === 'boolean';
321
+ const hasAuthorizedUserCountField = typeof bot.authorized_user_count === 'number';
322
+ const authorizedUserCount = numberField(bot.authorized_user_count);
323
+ const totalEventCount = numberField(bot.event_count);
324
+ const hasProviderEventCountField = typeof bot.provider_event_count === 'number';
325
+ const eventCount = numberField(hasProviderEventCountField ? bot.provider_event_count : bot.event_count);
326
+ const dbEventCount = providerDbEventCount(bot);
327
+ const dbParseErrorCount = numberField(bot.db_parse_error_count);
328
+ const hasDbLastEvent = Boolean(bot.db_last_event);
329
+ const dbLastEventAt = parseInboundReceivedAt(bot.db_last_event);
330
+ const latestParseOk = inboundParseOk(bot.db_last_event);
331
+ const latestEventIsStale = dbLastEventAt !== null && now - dbLastEventAt > staleAfterMs;
332
+ const lastError = stringField(bot.last_error) ?? stringField(bot.ws_last_error);
333
+ const restartCount = numberField(bot.restart_count);
334
+ const lastRestartAt = parseTimestamp(bot.last_restart_at);
335
+ const lastRestartReason = stringField(bot.last_restart_reason);
336
+ const restartGraceMs = larkRestartEffectGraceMs();
337
+ if (lastError) {
338
+ diagnostics.push({
339
+ code: 'lark_error',
340
+ level: 'warn',
341
+ message: `Lark bot ${appId} reports error: ${lastError}`,
342
+ hint: 'Restart this Lark websocket after fixing the reported error.',
343
+ });
344
+ } else if (wsState !== 'connected') {
345
+ diagnostics.push({
346
+ code: 'lark_ws_not_connected',
347
+ level: 'warn',
348
+ message: `Lark bot ${appId} websocket state is ${wsState ?? 'unknown'}.`,
349
+ hint: 'Run messaging repair-lark; if it cannot reconnect, check the app credentials and network path.',
350
+ });
351
+ } else if (eventCount === 0 && dbEventCount > 0 && hasDbLastEvent) {
352
+ diagnostics.push({
353
+ code: 'lark_no_events_since_restart',
354
+ level: 'warn',
355
+ message: `Lark bot ${appId} is connected, but this server process has not received provider events since restart.`,
356
+ hint: 'Send a fresh Feishu message; if this stays at zero, inspect Feishu event subscription/long-connection configuration and stop other clients for the same app stream.',
357
+ });
358
+ } else {
359
+ diagnostics.push({ code: 'lark_ws_connected', level: 'ok', message: `Lark bot ${appId} websocket is connected.` });
360
+ }
361
+ if (hasProviderEventCountField && eventCount === 0 && totalEventCount > 0) {
362
+ diagnostics.push({
363
+ code: 'lark_probe_only_since_restart',
364
+ level: 'warn',
365
+ message: `Lark bot ${appId} has only received local probe events since restart, not Feishu provider events.`,
366
+ hint: 'Pal local ingestion and delivery probing can pass while Feishu provider delivery is still broken; use watch-lark with a real Feishu message.',
367
+ });
368
+ }
369
+ if (hasAgentField && !boundAgent) {
370
+ diagnostics.push({
371
+ code: 'lark_no_bound_agent',
372
+ level: 'warn',
373
+ message: `Lark bot ${appId} is not bound to an agent.`,
374
+ hint: 'Bind this Lark bot to an agent before expecting inbound Feishu messages to create agent work.',
375
+ });
376
+ }
377
+ if (hasAuthorizedUserCountField && authorizedUserCount === 0) {
378
+ diagnostics.push({
379
+ code: 'lark_no_authorized_users',
380
+ level: 'warn',
381
+ message: `Lark bot ${appId} has no authorized sender union IDs configured.`,
382
+ hint: 'Add at least one authorized Feishu user with lark-users add; otherwise inbound messages are stored but skipped.',
383
+ });
384
+ }
385
+ if (hasBotOpenIdKnownField && !botOpenIdKnown) {
386
+ diagnostics.push({
387
+ code: 'lark_bot_open_id_unknown',
388
+ level: 'warn',
389
+ message: `Lark bot ${appId} does not have a known bot open_id in local config.`,
390
+ hint: 'Refresh the Lark binding so mention routing and outbound identity metadata do not depend on inference from future events.',
391
+ });
392
+ }
393
+ if (wsState === 'connected' && latestEventIsStale) {
394
+ diagnostics.push({
395
+ code: 'lark_events_stale',
396
+ level: 'warn',
397
+ message: `Lark bot ${appId} has not stored an inbound event for ${Math.round((now - dbLastEventAt!) / 1000)}s.`,
398
+ hint: 'If a new Feishu message was just sent, Pal is not receiving provider events yet; repair the websocket, then check Feishu app event delivery.',
399
+ });
400
+ }
401
+ if (
402
+ wsState === 'connected'
403
+ && eventCount === 0
404
+ && dbEventCount > 0
405
+ && restartCount > 0
406
+ && lastRestartAt !== null
407
+ && now - lastRestartAt >= restartGraceMs
408
+ ) {
409
+ diagnostics.push({
410
+ code: 'lark_restart_ineffective',
411
+ level: 'warn',
412
+ message: `Lark bot ${appId} has not received provider events after ${restartCount} websocket restart(s).`,
413
+ hint: lastRestartReason === 'auto_stale'
414
+ ? 'Automatic websocket repair has already run without restoring Feishu events; inspect Feishu long-connection event subscription and other consumers of this app stream.'
415
+ : 'Manual websocket repair did not restore Feishu events; inspect Feishu long-connection event subscription and other consumers of this app stream.',
416
+ });
417
+ }
418
+ if (latestParseOk === false && !latestEventIsStale) {
419
+ diagnostics.push({
420
+ code: 'lark_raw_event_parse_errors',
421
+ level: 'warn',
422
+ message: `Lark bot ${appId} latest raw inbound event lacks a parseable provider event id (${dbParseErrorCount} total parse failure(s)).`,
423
+ hint: 'This is historical raw-event quality; prioritize live event freshness unless new provider events are arriving with parse_ok=0.',
424
+ });
425
+ }
426
+ }
427
+
428
+ const totals = input.deliveryWebSocket.totals;
429
+ if (totals.connections === 0) {
430
+ diagnostics.push({
431
+ code: 'delivery_no_connections',
432
+ level: 'warn',
433
+ message: 'No active daemon connections are registered.',
434
+ hint: 'Start or reconnect the daemon for this computer before expecting agent replies.',
435
+ });
436
+ } else if (totals.open_sockets === 0) {
437
+ diagnostics.push({
438
+ code: 'delivery_ws_no_open_sockets',
439
+ level: 'warn',
440
+ message: 'Daemon connections exist, but no delivery websocket is open.',
441
+ hint: 'Restart the daemon or check /api/daemon/ws connectivity and connection credentials.',
442
+ });
443
+ } else {
444
+ diagnostics.push({ code: 'delivery_ws_open', level: 'ok', message: `${totals.open_sockets} delivery websocket socket(s) are open.` });
445
+ }
446
+ const staleHeartbeatConnections = staleDeliveryHeartbeatConnections(input.deliveryWebSocket);
447
+ if (staleHeartbeatConnections.length > 0) {
448
+ diagnostics.push({
449
+ code: 'delivery_ws_heartbeat_stale',
450
+ level: 'warn',
451
+ message: `${staleHeartbeatConnections.length} open delivery websocket connection(s) have stale or missing pong heartbeats.`,
452
+ hint: 'The socket may be half-open. Restart the daemon or inspect websocket ping/pong logs and network path.',
453
+ });
454
+ }
455
+
456
+ if (totals.pending_deliveries > 0 || totals.claimed_deliveries > 0 || totals.processing_completed_deliveries > 0) {
457
+ diagnostics.push({
458
+ code: 'delivery_backlog_non_empty',
459
+ level: 'warn',
460
+ message: `Delivery backlog is non-empty: pending=${totals.pending_deliveries}, claimed=${totals.claimed_deliveries}, processing_completed=${totals.processing_completed_deliveries}.`,
461
+ hint: 'A message has reached the delivery queue; inspect daemon logs and runtime execution for the blocked deliveries.',
462
+ });
463
+ } else {
464
+ diagnostics.push({ code: 'delivery_backlog_empty', level: 'ok', message: 'Delivery backlog is empty.' });
465
+ }
466
+ if (totals.expired_active_deliveries > 0) {
467
+ diagnostics.push({
468
+ code: 'delivery_expired_active',
469
+ level: 'warn',
470
+ message: `${totals.expired_active_deliveries} claimed/processing delivery lease(s) are expired.`,
471
+ hint: 'Wait for retry or restart the affected daemon if the active run is gone.',
472
+ });
473
+ }
474
+ return diagnostics;
475
+ }
476
+
477
+ export function larkBotNeedsRestart(bot: LarkMessagingStatus, options: { now?: number; staleAfterMs?: number } = {}): boolean {
478
+ const wsState = stringField(bot.ws_state);
479
+ const lastError = stringField(bot.last_error);
480
+ if (lastError) return true;
481
+ if (wsState !== 'connected') return true;
482
+
483
+ const eventCount = numberField(bot.provider_event_count ?? bot.event_count);
484
+ const dbEventCount = providerDbEventCount(bot);
485
+ const hasDbLastEvent = Boolean(bot.db_last_event);
486
+ if (eventCount === 0 && dbEventCount > 0 && hasDbLastEvent) return true;
487
+
488
+ const dbLastEventAt = parseInboundReceivedAt(bot.db_last_event);
489
+ const staleAfterMs = options.staleAfterMs ?? larkEventStaleAfterMs();
490
+ const now = options.now ?? Date.now();
491
+ return dbLastEventAt !== null && now - dbLastEventAt > staleAfterMs;
492
+ }
493
+
494
+ export function selectRestartableLarkBotAppIds(bots: LarkMessagingStatus[], options: { now?: number; staleAfterMs?: number } = {}): string[] {
495
+ const selected = new Set<string>();
496
+ for (const bot of bots) {
497
+ const appId = stringField(bot.app_id);
498
+ if (!appId) continue;
499
+ if (larkBotNeedsRestart(bot, options)) selected.add(appId);
500
+ }
501
+ return Array.from(selected);
502
+ }
503
+
504
+ export function summarizeMessagingDiagnostics(diagnostics: MessagingDiagnostic[]): MessagingStatusSummary {
505
+ const warnCount = diagnostics.filter((diagnostic) => diagnostic.level === 'warn').length;
506
+ return {
507
+ level: warnCount > 0 ? 'warn' : 'ok',
508
+ codes: diagnostics.map((diagnostic) => diagnostic.code),
509
+ warn_count: warnCount,
510
+ };
511
+ }
512
+
513
+ export function buildMessagingHealth(status: MessagingStatus): MessagingHealth {
514
+ const larkWarningPriority = [
515
+ 'lark_error',
516
+ 'lark_ws_not_connected',
517
+ 'lark_restart_ineffective',
518
+ 'lark_probe_only_since_restart',
519
+ 'lark_no_events_since_restart',
520
+ 'lark_events_stale',
521
+ 'lark_no_bots',
522
+ 'lark_no_bound_agent',
523
+ 'lark_no_authorized_users',
524
+ 'lark_bot_open_id_unknown',
525
+ 'lark_raw_event_parse_errors',
526
+ ];
527
+ const larkWarnings = prioritizedDiagnostics(warningDiagnostics(status.diagnostics, [
528
+ 'lark_no_bots',
529
+ 'lark_error',
530
+ 'lark_ws_not_connected',
531
+ 'lark_no_events_since_restart',
532
+ 'lark_probe_only_since_restart',
533
+ 'lark_events_stale',
534
+ 'lark_restart_ineffective',
535
+ 'lark_raw_event_parse_errors',
536
+ 'lark_no_bound_agent',
537
+ 'lark_no_authorized_users',
538
+ 'lark_bot_open_id_unknown',
539
+ ]), larkWarningPriority);
540
+ const deliveryWsWarnings = warningDiagnostics(status.diagnostics, [
541
+ 'delivery_no_connections',
542
+ 'delivery_ws_no_open_sockets',
543
+ 'delivery_ws_heartbeat_stale',
544
+ ]);
545
+ const backlogWarnings = warningDiagnostics(status.diagnostics, [
546
+ 'delivery_backlog_non_empty',
547
+ 'delivery_expired_active',
548
+ ]);
549
+ const totals = status.delivery_websocket.totals;
550
+ const staleHeartbeatConnections = staleDeliveryHeartbeatConnections(status.delivery_websocket);
551
+ const providerEvents = status.lark.bots.reduce((sum, bot) => sum + (bot.provider_event_count ?? bot.event_count ?? 0), 0);
552
+ const totalEvents = status.lark.bots.reduce((sum, bot) => sum + (bot.event_count ?? bot.provider_event_count ?? 0), 0);
553
+ const dbProviderEvents = status.lark.bots.reduce((sum, bot) => sum + providerDbEventCount(bot), 0);
554
+ const dbProbeEvents = status.lark.bots.reduce((sum, bot) => sum + (bot.db_probe_event_count ?? 0), 0);
555
+ const latestProviderEvent = status.lark.bots
556
+ .map((bot) => latestEventReceivedAt(bot))
557
+ .filter((value): value is string => Boolean(value))
558
+ .sort()
559
+ .at(-1) ?? null;
560
+ const latestProbeEvent = status.lark.bots
561
+ .map((bot) => latestProbeEventReceivedAt(bot))
562
+ .filter((value): value is string => Boolean(value))
563
+ .sort()
564
+ .at(-1) ?? null;
565
+ const larkCodes = larkWarnings.map((diagnostic) => diagnostic.code);
566
+ const deliveryWsCodes = deliveryWsWarnings.map((diagnostic) => diagnostic.code);
567
+ const backlogCodes = backlogWarnings.map((diagnostic) => diagnostic.code);
568
+
569
+ return {
570
+ level: status.summary.level,
571
+ domains: {
572
+ lark_provider: {
573
+ level: larkWarnings.length > 0 ? 'warn' : 'ok',
574
+ codes: larkCodes,
575
+ message: larkWarnings.length > 0
576
+ ? 'Lark provider ingress is not healthy.'
577
+ : 'Lark provider ingress has no active diagnostics.',
578
+ hint: larkProviderHint(larkCodes, larkWarnings),
579
+ recommended_action: recommendLarkProviderAction(larkCodes),
580
+ details: {
581
+ bots: status.lark.bots.length,
582
+ provider_events: providerEvents,
583
+ total_events: totalEvents,
584
+ db_provider_events: dbProviderEvents,
585
+ db_probe_events: dbProbeEvents,
586
+ latest_provider_event_at: latestProviderEvent,
587
+ latest_probe_event_at: latestProbeEvent,
588
+ restart_count: status.lark.bots.reduce((sum, bot) => sum + (bot.restart_count ?? 0), 0),
589
+ latest_restart_at: status.lark.bots
590
+ .map((bot) => bot.last_restart_at)
591
+ .filter((value): value is string => Boolean(value))
592
+ .sort()
593
+ .at(-1) ?? null,
594
+ },
595
+ },
596
+ delivery_websocket: {
597
+ level: deliveryWsWarnings.length > 0 ? 'warn' : 'ok',
598
+ codes: deliveryWsCodes,
599
+ message: deliveryWsWarnings.length > 0
600
+ ? 'Server-to-daemon delivery websocket is not healthy.'
601
+ : 'Server-to-daemon delivery websocket is healthy.',
602
+ hint: firstHint(deliveryWsWarnings),
603
+ recommended_action: recommendedAction(deliveryWsCodes, [
604
+ ['delivery_no_connections', 'start_or_reconnect_daemon'],
605
+ ['delivery_ws_no_open_sockets', 'restart_daemon_websocket'],
606
+ ['delivery_ws_heartbeat_stale', 'restart_daemon_websocket'],
607
+ ]),
608
+ details: {
609
+ connections: totals.connections,
610
+ open_sockets: totals.open_sockets,
611
+ stale_heartbeat_connections: staleHeartbeatConnections.length,
612
+ stale_heartbeat_connection_ids: staleHeartbeatConnections.map((connection) => connection.connection_id),
613
+ stale_heartbeat_computers: staleHeartbeatConnections.map((connection) => connection.computer_id ?? '-'),
614
+ },
615
+ },
616
+ delivery_backlog: {
617
+ level: backlogWarnings.length > 0 ? 'warn' : 'ok',
618
+ codes: backlogCodes,
619
+ message: backlogWarnings.length > 0
620
+ ? 'Delivery backlog has pending or active work.'
621
+ : 'Delivery backlog is empty.',
622
+ hint: firstHint(backlogWarnings),
623
+ recommended_action: recommendedAction(backlogCodes, [
624
+ ['delivery_backlog_non_empty', 'inspect_daemon_runtime'],
625
+ ['delivery_expired_active', 'restart_affected_daemon'],
626
+ ]),
627
+ details: {
628
+ pending: totals.pending_deliveries,
629
+ claimed: totals.claimed_deliveries,
630
+ processing_completed: totals.processing_completed_deliveries,
631
+ expired_active: totals.expired_active_deliveries,
632
+ },
633
+ },
634
+ },
635
+ summary: status.summary,
636
+ };
637
+ }
638
+
639
+ export function buildLarkProviderHealthLogLine(status: MessagingStatus): MessagingHealthLogLine | null {
640
+ return buildMessagingHealthLogLines(status).find((line) => line.throttle_key.startsWith('lark_provider:')) ?? null;
641
+ }
642
+
643
+ function formatHealthDetails(details: Record<string, unknown> | undefined): string[] {
644
+ if (!details) return [];
645
+ return Object.entries(details).map(([key, value]) => `${key}=${value ?? '-'}`);
646
+ }
647
+
648
+ function buildHealthLogLine(domainName: MessagingHealthDomainName, domain: MessagingHealthDomain): MessagingHealthLogLine | null {
649
+ if (domain.level !== 'warn') return null;
650
+ const parts = [
651
+ domain.message,
652
+ `domain=${domainName}`,
653
+ `codes=${domain.codes.length > 0 ? domain.codes.join(',') : '-'}`,
654
+ `action=${domain.recommended_action ?? '-'}`,
655
+ ...formatHealthDetails(domain.details),
656
+ ];
657
+ if (domain.hint) parts.push(`hint=${domain.hint}`);
658
+ return {
659
+ throttle_key: `${domainName}:${domain.codes.join(',')}:${domain.recommended_action ?? ''}`,
660
+ message: parts.join(' '),
661
+ };
662
+ }
663
+
664
+ export function buildMessagingHealthLogLines(status: MessagingStatus): MessagingHealthLogLine[] {
665
+ const health = buildMessagingHealth(status);
666
+ return (Object.entries(health.domains) as Array<[MessagingHealthDomainName, MessagingHealthDomain]>)
667
+ .map(([name, domain]) => buildHealthLogLine(name, domain))
668
+ .filter((line): line is MessagingHealthLogLine => Boolean(line));
669
+ }
@@ -0,0 +1,10 @@
1
+ import type { Database } from 'bun:sqlite';
2
+
3
+ export const version = 24;
4
+ export const name = 'agents_model';
5
+
6
+ export function up(db: Database): void {
7
+ db.exec(`
8
+ ALTER TABLE agents ADD COLUMN model TEXT NULL;
9
+ `);
10
+ }