multiagents 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ // ============================================================================
2
+ // multiagents — Team Progress & Status
3
+ // ============================================================================
4
+ // Computes aggregate team health, per-agent status, and formats for display.
5
+ // ============================================================================
6
+
7
+ import type { Slot, Message, Session } from "../shared/types.ts";
8
+ import type { BrokerClient } from "../shared/broker-client.ts";
9
+ import {
10
+ STUCK_THRESHOLD_MS,
11
+ SLOW_THRESHOLD_MS,
12
+ } from "../shared/constants.ts";
13
+ import { formatDuration, formatTime, safeJsonParse } from "../shared/utils.ts";
14
+
15
+ /** Health state for a single agent. */
16
+ export type HealthState = "healthy" | "slow" | "stuck" | "crashed";
17
+
18
+ /** Status for a single agent in the team. */
19
+ export interface AgentStatus {
20
+ slot_id: number;
21
+ name: string;
22
+ role: string;
23
+ agent_type: string;
24
+ health: HealthState;
25
+ status: string;
26
+ paused: boolean;
27
+ last_activity: string;
28
+ summary: string;
29
+ }
30
+
31
+ /** Issue detected in the team. */
32
+ export interface Issue {
33
+ severity: "info" | "warning" | "critical";
34
+ slot_id?: number;
35
+ message: string;
36
+ }
37
+
38
+ /** Full team status snapshot. */
39
+ export interface TeamStatus {
40
+ session_id: string;
41
+ session_name: string;
42
+ overall: string;
43
+ elapsed: number;
44
+ agents: AgentStatus[];
45
+ issues: Issue[];
46
+ recent_messages: Message[];
47
+ }
48
+
49
+ /**
50
+ * Assess the health of a single slot based on timing thresholds.
51
+ */
52
+ export function assessHealth(slot: Slot): HealthState {
53
+ if (slot.status === "disconnected") {
54
+ return "crashed";
55
+ }
56
+
57
+ if (slot.paused) {
58
+ return "healthy"; // Paused agents are intentionally idle
59
+ }
60
+
61
+ // Determine last activity time — prefer updated_at from context_snapshot
62
+ const snapshot = slot.context_snapshot ? JSON.parse(slot.context_snapshot) : null;
63
+ const lastActivity = snapshot?.updated_at ?? slot.last_connected ?? 0;
64
+ const elapsed = Date.now() - lastActivity;
65
+
66
+ if (elapsed > STUCK_THRESHOLD_MS) {
67
+ return "stuck";
68
+ }
69
+ if (elapsed > SLOW_THRESHOLD_MS) {
70
+ return "slow";
71
+ }
72
+ return "healthy";
73
+ }
74
+
75
+ /**
76
+ * Aggregate team status from all slots in a session.
77
+ */
78
+ export async function getTeamStatus(
79
+ sessionId: string,
80
+ brokerClient: BrokerClient,
81
+ ): Promise<TeamStatus> {
82
+ const [session, slots, recentMessages] = await Promise.all([
83
+ brokerClient.getSession(sessionId),
84
+ brokerClient.listSlots(sessionId),
85
+ brokerClient.getMessageLog(sessionId, { limit: 10 }),
86
+ ]);
87
+
88
+ const agents: AgentStatus[] = slots.map((slot) => {
89
+ const health = assessHealth(slot);
90
+ const snapshot = safeJsonParse<{ last_summary?: string; last_status?: string }>(
91
+ slot.context_snapshot,
92
+ {},
93
+ );
94
+
95
+ return {
96
+ slot_id: slot.id,
97
+ name: slot.display_name ?? `Agent #${slot.id}`,
98
+ role: slot.role ?? "unassigned",
99
+ agent_type: slot.agent_type,
100
+ health,
101
+ status: slot.paused ? "paused" : slot.status,
102
+ paused: slot.paused,
103
+ last_activity: slot.last_connected
104
+ ? formatDuration(Date.now() - slot.last_connected) + " ago"
105
+ : "never",
106
+ summary: snapshot.last_summary ?? snapshot.last_status ?? "",
107
+ };
108
+ });
109
+
110
+ // Collect issues
111
+ const issues: Issue[] = [];
112
+ for (const agent of agents) {
113
+ if (agent.health === "crashed") {
114
+ issues.push({
115
+ severity: "critical",
116
+ slot_id: agent.slot_id,
117
+ message: `${agent.name} has crashed or disconnected`,
118
+ });
119
+ } else if (agent.health === "stuck") {
120
+ issues.push({
121
+ severity: "warning",
122
+ slot_id: agent.slot_id,
123
+ message: `${agent.name} appears stuck (no activity for ${agent.last_activity})`,
124
+ });
125
+ } else if (agent.health === "slow") {
126
+ issues.push({
127
+ severity: "info",
128
+ slot_id: agent.slot_id,
129
+ message: `${agent.name} is responding slowly`,
130
+ });
131
+ }
132
+ }
133
+
134
+ // Determine overall status
135
+ const healthCounts = { healthy: 0, slow: 0, stuck: 0, crashed: 0 };
136
+ for (const a of agents) {
137
+ healthCounts[a.health]++;
138
+ }
139
+
140
+ let overall: string;
141
+ if (session.status === "paused") {
142
+ overall = "paused";
143
+ } else if (healthCounts.crashed > 0) {
144
+ overall = "degraded";
145
+ } else if (healthCounts.stuck > 0) {
146
+ overall = "issues";
147
+ } else if (healthCounts.slow > 0) {
148
+ overall = "slow";
149
+ } else {
150
+ overall = "healthy";
151
+ }
152
+
153
+ const elapsed = Date.now() - session.created_at;
154
+
155
+ return {
156
+ session_id: sessionId,
157
+ session_name: session.name,
158
+ overall,
159
+ elapsed,
160
+ agents,
161
+ issues,
162
+ recent_messages: recentMessages,
163
+ };
164
+ }
165
+
166
+ /**
167
+ * Format a TeamStatus into a human-readable text table for display.
168
+ */
169
+ export function formatTeamStatusForDisplay(status: TeamStatus): string {
170
+ const lines: string[] = [];
171
+
172
+ // Header
173
+ lines.push(`=== Team: ${status.session_name} ===`);
174
+ lines.push(`Session: ${status.session_id} | Status: ${status.overall} | Elapsed: ${formatDuration(status.elapsed)}`);
175
+ lines.push("");
176
+
177
+ // Agent table
178
+ lines.push("Agents:");
179
+ lines.push(" Name | Role | Type | Health | Status");
180
+ lines.push(" " + "-".repeat(78));
181
+
182
+ for (const agent of status.agents) {
183
+ const name = agent.name.padEnd(20).slice(0, 20);
184
+ const role = agent.role.padEnd(17).slice(0, 17);
185
+ const type = agent.agent_type.padEnd(7).slice(0, 7);
186
+ const health = agent.health.padEnd(7).slice(0, 7);
187
+ const agentStatus = agent.paused ? "paused" : agent.status;
188
+ lines.push(` ${name} | ${role} | ${type} | ${health} | ${agentStatus}`);
189
+ }
190
+
191
+ // Issues
192
+ if (status.issues.length > 0) {
193
+ lines.push("");
194
+ lines.push("Issues:");
195
+ for (const issue of status.issues) {
196
+ const icon = issue.severity === "critical" ? "[!]" : issue.severity === "warning" ? "[?]" : "[i]";
197
+ lines.push(` ${icon} ${issue.message}`);
198
+ }
199
+ }
200
+
201
+ // Recent messages
202
+ if (status.recent_messages.length > 0) {
203
+ lines.push("");
204
+ lines.push("Recent messages:");
205
+ for (const msg of status.recent_messages.slice(0, 5)) {
206
+ const time = formatTime(msg.sent_at);
207
+ const from = msg.from_slot_id !== null ? `slot ${msg.from_slot_id}` : msg.from_id;
208
+ const text = msg.text.slice(0, 80);
209
+ lines.push(` [${time}] ${from}: ${text}`);
210
+ }
211
+ }
212
+
213
+ return lines.join("\n");
214
+ }
@@ -0,0 +1,176 @@
1
+ // ============================================================================
2
+ // multiagents — Crash Recovery
3
+ // ============================================================================
4
+ // Handles agent crashes: flap detection, context preservation, respawn with
5
+ // handoff prompts.
6
+ // ============================================================================
7
+
8
+ import type { BrokerClient } from "../shared/broker-client.ts";
9
+ import type { AgentEvent } from "./monitor.ts";
10
+ import { FLAP_THRESHOLD, FLAP_WINDOW_MS } from "../shared/constants.ts";
11
+ import { log, safeJsonParse, formatDuration } from "../shared/utils.ts";
12
+ import { launchAgent, buildTeamContext } from "./launcher.ts";
13
+
14
+ const LOG_PREFIX = "recovery";
15
+
16
+ /** Track crash timestamps per slot for flap detection. */
17
+ const crashHistory: Map<number, number[]> = new Map();
18
+
19
+ /**
20
+ * Handle an agent crash: check for flapping, gather context, and return
21
+ * an event with suggested actions.
22
+ */
23
+ export async function handleAgentCrash(
24
+ slotId: number,
25
+ exitCode: number,
26
+ sessionId: string,
27
+ brokerClient: BrokerClient,
28
+ ): Promise<AgentEvent> {
29
+ const now = Date.now();
30
+
31
+ // Record this crash
32
+ const history = crashHistory.get(slotId) ?? [];
33
+ history.push(now);
34
+ crashHistory.set(slotId, history);
35
+
36
+ // Prune old crashes outside the flap window
37
+ const recentCrashes = history.filter((t) => now - t < FLAP_WINDOW_MS);
38
+ crashHistory.set(slotId, recentCrashes);
39
+
40
+ // Check for flapping
41
+ const isFlapping = recentCrashes.length >= FLAP_THRESHOLD;
42
+
43
+ // Get slot info for context
44
+ let slotName = `Slot ${slotId}`;
45
+ let slotRole = "unknown";
46
+ let lastSummary = "";
47
+
48
+ try {
49
+ const slot = await brokerClient.getSlot(slotId);
50
+ slotName = slot.display_name ?? slotName;
51
+ slotRole = slot.role ?? slotRole;
52
+ const snapshot = safeJsonParse<{ last_summary?: string }>(
53
+ slot.context_snapshot,
54
+ {},
55
+ );
56
+ lastSummary = snapshot.last_summary ?? "";
57
+ } catch {
58
+ // Slot may already be cleaned up
59
+ }
60
+
61
+ // Build event with suggested actions
62
+ const suggestedActions: string[] = [];
63
+
64
+ if (isFlapping) {
65
+ suggestedActions.push("Agent is flapping — do NOT auto-restart");
66
+ suggestedActions.push("Investigate root cause before restarting");
67
+ suggestedActions.push("Consider reassigning this agent's tasks");
68
+
69
+ log(
70
+ LOG_PREFIX,
71
+ `${slotName} is flapping: ${recentCrashes.length} crashes in ${formatDuration(FLAP_WINDOW_MS)}`,
72
+ );
73
+
74
+ return {
75
+ type: "agent_flapping",
76
+ severity: "critical",
77
+ slotId,
78
+ sessionId,
79
+ message: `${slotName} (${slotRole}) is flapping: ${recentCrashes.length} crashes in ${formatDuration(FLAP_WINDOW_MS)}. Exit code: ${exitCode}`,
80
+ data: {
81
+ exit_code: exitCode,
82
+ crash_count: recentCrashes.length,
83
+ flap_window_ms: FLAP_WINDOW_MS,
84
+ is_flapping: true,
85
+ last_summary: lastSummary,
86
+ suggested_actions: suggestedActions,
87
+ },
88
+ };
89
+ }
90
+
91
+ // Not flapping — suggest respawn
92
+ suggestedActions.push("Auto-respawn recommended");
93
+ suggestedActions.push("Respawn will include handoff context from previous run");
94
+
95
+ log(LOG_PREFIX, `${slotName} crashed (exit ${exitCode}), crash ${recentCrashes.length}/${FLAP_THRESHOLD} in window`);
96
+
97
+ return {
98
+ type: "agent_crashed",
99
+ severity: "critical",
100
+ slotId,
101
+ sessionId,
102
+ message: `${slotName} (${slotRole}) crashed with exit code ${exitCode}. Crash ${recentCrashes.length}/${FLAP_THRESHOLD} in window.`,
103
+ data: {
104
+ exit_code: exitCode,
105
+ crash_count: recentCrashes.length,
106
+ is_flapping: false,
107
+ last_summary: lastSummary,
108
+ suggested_actions: suggestedActions,
109
+ },
110
+ };
111
+ }
112
+
113
+ /**
114
+ * Respawn a crashed agent with a handoff prompt that includes:
115
+ * - The original role and task context
116
+ * - A recap of recent messages
117
+ * - Team roster
118
+ */
119
+ export async function respawnAgent(
120
+ sessionId: string,
121
+ slotId: number,
122
+ brokerClient: BrokerClient,
123
+ projectDir: string,
124
+ ): Promise<{ pid: number }> {
125
+ // Get the crashed slot's info
126
+ const slot = await brokerClient.getSlot(slotId);
127
+ const snapshot = safeJsonParse<{ last_summary?: string; last_status?: string }>(
128
+ slot.context_snapshot,
129
+ {},
130
+ );
131
+
132
+ // Get recent messages for this slot to build recap
133
+ const messages = await brokerClient.getMessageLog(sessionId, {
134
+ limit: 20,
135
+ with_slot: slotId,
136
+ });
137
+
138
+ const recapLines = messages.map(
139
+ (m) => `[${m.msg_type}] ${m.from_slot_id !== null ? `slot ${m.from_slot_id}` : m.from_id}: ${m.text.slice(0, 150)}`,
140
+ );
141
+
142
+ // Build team context
143
+ const teamContext = await buildTeamContext(sessionId, slotId, brokerClient);
144
+
145
+ // Build handoff task prompt
146
+ const handoffTask = [
147
+ `You are being restarted after a crash. Here is your context:`,
148
+ "",
149
+ `Role: ${slot.role ?? "unassigned"}`,
150
+ slot.role_description ? `Role description: ${slot.role_description}` : "",
151
+ snapshot.last_summary ? `Last known status: ${snapshot.last_summary}` : "",
152
+ "",
153
+ teamContext,
154
+ "",
155
+ recapLines.length > 0
156
+ ? `Recent message history:\n${recapLines.join("\n")}`
157
+ : "No recent message history.",
158
+ "",
159
+ "Continue from where you left off. Check the current state of your files before making changes.",
160
+ ]
161
+ .filter(Boolean)
162
+ .join("\n");
163
+
164
+ // Launch with the handoff prompt
165
+ const result = await launchAgent(sessionId, projectDir, {
166
+ agent_type: slot.agent_type,
167
+ name: slot.display_name ?? `Agent #${slotId}`,
168
+ role: slot.role ?? "general",
169
+ role_description: slot.role_description ?? "",
170
+ initial_task: handoffTask,
171
+ }, brokerClient);
172
+
173
+ log(LOG_PREFIX, `Respawned slot ${slotId} as new slot ${result.slotId} (PID ${result.pid})`);
174
+
175
+ return { pid: result.pid };
176
+ }