@aion0/forge 0.4.16 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +27 -2
  2. package/RELEASE_NOTES.md +21 -14
  3. package/app/api/agents/route.ts +17 -0
  4. package/app/api/delivery/[id]/route.ts +62 -0
  5. package/app/api/delivery/route.ts +40 -0
  6. package/app/api/mobile-chat/route.ts +13 -7
  7. package/app/api/monitor/route.ts +10 -6
  8. package/app/api/pipelines/[id]/route.ts +16 -3
  9. package/app/api/tasks/route.ts +2 -1
  10. package/app/api/workspace/[id]/agents/route.ts +35 -0
  11. package/app/api/workspace/[id]/memory/route.ts +23 -0
  12. package/app/api/workspace/[id]/smith/route.ts +22 -0
  13. package/app/api/workspace/[id]/stream/route.ts +28 -0
  14. package/app/api/workspace/route.ts +100 -0
  15. package/app/global-error.tsx +10 -4
  16. package/app/icon.ico +0 -0
  17. package/app/layout.tsx +2 -2
  18. package/app/login/LoginForm.tsx +96 -0
  19. package/app/login/page.tsx +7 -98
  20. package/app/page.tsx +2 -2
  21. package/bin/forge-server.mjs +13 -1
  22. package/check-forge-status.sh +9 -0
  23. package/components/ConversationEditor.tsx +411 -0
  24. package/components/ConversationGraphView.tsx +347 -0
  25. package/components/ConversationTerminalView.tsx +303 -0
  26. package/components/Dashboard.tsx +36 -39
  27. package/components/DashboardWrapper.tsx +9 -0
  28. package/components/DeliveryFlowEditor.tsx +491 -0
  29. package/components/DeliveryList.tsx +230 -0
  30. package/components/DeliveryWorkspace.tsx +589 -0
  31. package/components/DocTerminal.tsx +10 -2
  32. package/components/DocsViewer.tsx +10 -2
  33. package/components/HelpTerminal.tsx +11 -6
  34. package/components/InlinePipelineView.tsx +111 -0
  35. package/components/MobileView.tsx +20 -0
  36. package/components/MonitorPanel.tsx +9 -4
  37. package/components/NewTaskModal.tsx +32 -0
  38. package/components/PipelineEditor.tsx +49 -6
  39. package/components/PipelineView.tsx +482 -64
  40. package/components/ProjectDetail.tsx +314 -56
  41. package/components/ProjectManager.tsx +49 -4
  42. package/components/SessionView.tsx +27 -13
  43. package/components/SettingsModal.tsx +790 -124
  44. package/components/SkillsPanel.tsx +31 -8
  45. package/components/TaskBoard.tsx +3 -0
  46. package/components/WebTerminal.tsx +257 -43
  47. package/components/WorkspaceTree.tsx +221 -0
  48. package/components/WorkspaceView.tsx +2245 -0
  49. package/install.sh +2 -2
  50. package/lib/agents/claude-adapter.ts +104 -0
  51. package/lib/agents/generic-adapter.ts +64 -0
  52. package/lib/agents/index.ts +242 -0
  53. package/lib/agents/types.ts +70 -0
  54. package/lib/artifacts.ts +106 -0
  55. package/lib/delivery.ts +787 -0
  56. package/lib/forge-skills/forge-inbox.md +37 -0
  57. package/lib/forge-skills/forge-send.md +40 -0
  58. package/lib/forge-skills/forge-status.md +32 -0
  59. package/lib/forge-skills/forge-workspace-sync.md +37 -0
  60. package/lib/help-docs/00-overview.md +7 -1
  61. package/lib/help-docs/01-settings.md +159 -2
  62. package/lib/help-docs/05-pipelines.md +89 -0
  63. package/lib/help-docs/07-projects.md +35 -1
  64. package/lib/help-docs/11-workspace.md +254 -0
  65. package/lib/help-docs/CLAUDE.md +7 -2
  66. package/lib/init.ts +60 -10
  67. package/lib/pipeline.ts +537 -1
  68. package/lib/settings.ts +115 -22
  69. package/lib/skills.ts +249 -372
  70. package/lib/task-manager.ts +113 -33
  71. package/lib/telegram-bot.ts +33 -1
  72. package/lib/workspace/__tests__/state-machine.test.ts +388 -0
  73. package/lib/workspace/__tests__/workspace.test.ts +311 -0
  74. package/lib/workspace/agent-bus.ts +416 -0
  75. package/lib/workspace/agent-worker.ts +667 -0
  76. package/lib/workspace/backends/api-backend.ts +262 -0
  77. package/lib/workspace/backends/cli-backend.ts +479 -0
  78. package/lib/workspace/index.ts +82 -0
  79. package/lib/workspace/manager.ts +136 -0
  80. package/lib/workspace/orchestrator.ts +1914 -0
  81. package/lib/workspace/persistence.ts +310 -0
  82. package/lib/workspace/presets.ts +170 -0
  83. package/lib/workspace/skill-installer.ts +188 -0
  84. package/lib/workspace/smith-memory.ts +498 -0
  85. package/lib/workspace/types.ts +231 -0
  86. package/lib/workspace/watch-manager.ts +288 -0
  87. package/lib/workspace-standalone.ts +814 -0
  88. package/middleware.ts +1 -0
  89. package/next-env.d.ts +1 -1
  90. package/package.json +4 -1
  91. package/src/config/index.ts +12 -1
  92. package/src/core/db/database.ts +1 -0
  93. package/start.sh +7 -0
@@ -0,0 +1,1914 @@
1
+ /**
2
+ * WorkspaceOrchestrator — manages a group of agents within a workspace.
3
+ *
4
+ * Responsibilities:
5
+ * - Create/remove agents
6
+ * - Run agents (auto-select backend, inject upstream context)
7
+ * - Listen to agent events → trigger downstream agents
8
+ * - Approval gating
9
+ * - Parallel execution (independent agents run concurrently)
10
+ * - Error recovery (restart from lastCheckpoint)
11
+ */
12
+
13
+ import { EventEmitter } from 'node:events';
14
+ import { readFileSync, existsSync } from 'node:fs';
15
+ import { resolve } from 'node:path';
16
+ import type {
17
+ WorkspaceAgentConfig,
18
+ AgentState,
19
+ SmithStatus,
20
+ TaskStatus,
21
+ AgentMode,
22
+ WorkerEvent,
23
+ BusMessage,
24
+ Artifact,
25
+ WorkspaceState,
26
+ DaemonWakeReason,
27
+ } from './types';
28
+ import { AgentWorker } from './agent-worker';
29
+ import { AgentBus } from './agent-bus';
30
+ import { WatchManager } from './watch-manager';
31
+ import { ApiBackend } from './backends/api-backend';
32
+ import { CliBackend } from './backends/cli-backend';
33
+ import { appendAgentLog, saveWorkspace, saveWorkspaceSync, startAutoSave, stopAutoSave } from './persistence';
34
+ import { hasForgeSkills, installForgeSkills } from './skill-installer';
35
+ import {
36
+ loadMemory, saveMemory, createMemory, formatMemoryForPrompt,
37
+ addObservation, addSessionSummary, parseStepToObservations, buildSessionSummary,
38
+ } from './smith-memory';
39
+
40
+ // ─── Orchestrator Events ─────────────────────────────────
41
+
42
+ export type OrchestratorEvent =
43
+ | WorkerEvent
44
+ | { type: 'bus_message'; message: BusMessage }
45
+ | { type: 'approval_required'; agentId: string; upstreamId: string }
46
+ | { type: 'user_input_request'; agentId: string; fromAgent: string; question: string }
47
+ | { type: 'workspace_status'; running: number; done: number; total: number }
48
+ | { type: 'workspace_complete' }
49
+ | { type: 'watch_alert'; agentId: string; changes: any[]; summary: string; timestamp: number };
50
+
51
+ // ─── Orchestrator class ──────────────────────────────────
52
+
53
+ export class WorkspaceOrchestrator extends EventEmitter {
54
+ readonly workspaceId: string;
55
+ readonly projectPath: string;
56
+ readonly projectName: string;
57
+
58
+ private agents = new Map<string, { config: WorkspaceAgentConfig; worker: AgentWorker | null; state: AgentState }>();
59
+ private bus: AgentBus;
60
+ private watchManager: WatchManager;
61
+ private approvalQueue = new Set<string>();
62
+ private daemonActive = false;
63
+ private createdAt = Date.now();
64
+ private healthCheckTimer: NodeJS.Timeout | null = null;
65
+
66
+ constructor(workspaceId: string, projectPath: string, projectName: string) {
67
+ super();
68
+ this.workspaceId = workspaceId;
69
+ this.projectPath = projectPath;
70
+ this.projectName = projectName;
71
+ this.bus = new AgentBus();
72
+ this.watchManager = new WatchManager(workspaceId, projectPath, () => this.agents as any);
73
+ // Handle watch events
74
+ this.watchManager.on('watch_alert', (event) => {
75
+ this.emit('event', event);
76
+ // Push alert to agent history so Log panel shows it
77
+ const alertEntry = this.agents.get(event.agentId);
78
+ if (alertEntry && event.entry) {
79
+ alertEntry.state.history.push(event.entry);
80
+ this.emit('event', { type: 'log', agentId: event.agentId, entry: event.entry } as any);
81
+ }
82
+ this.handleWatchAlert(event.agentId, event.summary);
83
+ });
84
+ // Note: watch_heartbeat (no changes) only logs to console, not to agent history/logs.jsonl
85
+
86
+ // Forward bus messages as orchestrator events (after dedup, skip ACKs)
87
+ this.bus.on('message', (msg: BusMessage) => {
88
+ if (msg.type === 'ack') return; // ACKs are internal, don't emit to UI
89
+ if (msg.to === '_system') {
90
+ this.emit('event', { type: 'bus_message', message: msg } satisfies OrchestratorEvent);
91
+ return;
92
+ }
93
+ this.handleBusMessage(msg);
94
+ });
95
+
96
+ // Start auto-save (every 10 seconds)
97
+ startAutoSave(workspaceId, () => this.getFullState());
98
+ }
99
+
100
+ // ─── Agent Management ──────────────────────────────────
101
+
102
+ /** Check if agent outputs or workDir conflict with existing agents */
103
+ private validateOutputs(config: WorkspaceAgentConfig, excludeId?: string): string | null {
104
+ if (config.type === 'input') return null;
105
+
106
+ const normalize = (p: string) => p.replace(/^\.?\//, '').replace(/\/$/, '') || '.';
107
+
108
+ // Validate workDir is within project (no ../ escape)
109
+ if (config.workDir) {
110
+ const relativeDir = config.workDir.replace(/^\.?\//, '');
111
+ if (relativeDir.includes('..')) {
112
+ return `Work directory "${config.workDir}" contains "..". Must be a subdirectory of the project.`;
113
+ }
114
+ const projectRoot = this.projectPath.endsWith('/') ? this.projectPath : this.projectPath + '/';
115
+ const resolved = resolve(this.projectPath, relativeDir);
116
+ if (resolved !== this.projectPath && !resolved.startsWith(projectRoot)) {
117
+ return `Work directory "${config.workDir}" is outside the project. Must be a subdirectory.`;
118
+ }
119
+ }
120
+
121
+ // Every non-input smith must have a unique workDir
122
+ const newDir = normalize(config.workDir || '.');
123
+
124
+ for (const [id, entry] of this.agents) {
125
+ if (id === excludeId || entry.config.type === 'input') continue;
126
+
127
+ const existingDir = normalize(entry.config.workDir || '.');
128
+
129
+ // Same workDir → conflict
130
+ if (newDir === existingDir) {
131
+ return `Work directory conflict: "${config.label}" and "${entry.config.label}" both use "${newDir === '.' ? 'project root' : newDir}/". Each smith must have a unique work directory.`;
132
+ }
133
+
134
+ // One is parent of the other → conflict (e.g., "src" and "src/components")
135
+ if (newDir.startsWith(existingDir + '/') || existingDir.startsWith(newDir + '/')) {
136
+ return `Work directory conflict: "${config.label}" (${newDir}/) overlaps with "${entry.config.label}" (${existingDir}/). Nested directories not allowed.`;
137
+ }
138
+
139
+ // Check output path overlap
140
+ for (const out of config.outputs) {
141
+ for (const existing of entry.config.outputs) {
142
+ if (normalize(out) === normalize(existing)) {
143
+ return `Output conflict: "${config.label}" and "${entry.config.label}" both output to "${out}"`;
144
+ }
145
+ }
146
+ }
147
+ }
148
+ return null;
149
+ }
150
+
151
+ /** Detect if adding dependsOn edges would create a cycle in the DAG */
152
+ private detectCycle(agentId: string, dependsOn: string[]): string | null {
153
+ // Build adjacency: agent → agents it depends on
154
+ const deps = new Map<string, string[]>();
155
+ for (const [id, entry] of this.agents) {
156
+ if (id !== agentId) deps.set(id, [...entry.config.dependsOn]);
157
+ }
158
+ deps.set(agentId, [...dependsOn]);
159
+
160
+ // DFS cycle detection
161
+ const visited = new Set<string>();
162
+ const inStack = new Set<string>();
163
+
164
+ const dfs = (node: string): string | null => {
165
+ if (inStack.has(node)) return node; // cycle found
166
+ if (visited.has(node)) return null;
167
+ visited.add(node);
168
+ inStack.add(node);
169
+ for (const dep of deps.get(node) || []) {
170
+ const cycle = dfs(dep);
171
+ if (cycle) return cycle;
172
+ }
173
+ inStack.delete(node);
174
+ return null;
175
+ };
176
+
177
+ for (const id of deps.keys()) {
178
+ const cycle = dfs(id);
179
+ if (cycle) {
180
+ const cycleName = this.agents.get(cycle)?.config.label || cycle;
181
+ return `Circular dependency detected involving "${cycleName}". Dependencies must form a DAG (no cycles).`;
182
+ }
183
+ }
184
+ return null;
185
+ }
186
+
187
+ /** Check if agentA is upstream of agentB (A is in B's dependency chain) */
188
+ isUpstream(agentA: string, agentB: string): boolean {
189
+ const visited = new Set<string>();
190
+ const check = (current: string): boolean => {
191
+ if (current === agentA) return true;
192
+ if (visited.has(current)) return false;
193
+ visited.add(current);
194
+ const entry = this.agents.get(current);
195
+ if (!entry) return false;
196
+ return entry.config.dependsOn.some(dep => check(dep));
197
+ };
198
+ return check(agentB);
199
+ }
200
+
201
+ addAgent(config: WorkspaceAgentConfig): void {
202
+ const conflict = this.validateOutputs(config);
203
+ if (conflict) throw new Error(conflict);
204
+
205
+ // Check DAG cycle before adding
206
+ const cycleErr = this.detectCycle(config.id, config.dependsOn);
207
+ if (cycleErr) throw new Error(cycleErr);
208
+
209
+ const state: AgentState = {
210
+ smithStatus: 'down',
211
+ mode: 'auto',
212
+ taskStatus: 'idle',
213
+ history: [],
214
+ artifacts: [],
215
+ };
216
+ this.agents.set(config.id, { config, worker: null, state });
217
+ this.saveNow();
218
+ this.emitAgentsChanged();
219
+ }
220
+
221
+ removeAgent(id: string): void {
222
+ const entry = this.agents.get(id);
223
+ if (entry?.worker) {
224
+ entry.worker.stop();
225
+ }
226
+ this.agents.delete(id);
227
+ this.approvalQueue.delete(id);
228
+
229
+ // Clean up dangling dependsOn references in other agents
230
+ for (const [, other] of this.agents) {
231
+ const idx = other.config.dependsOn.indexOf(id);
232
+ if (idx !== -1) {
233
+ other.config.dependsOn.splice(idx, 1);
234
+ }
235
+ }
236
+
237
+ this.saveNow();
238
+ this.emitAgentsChanged();
239
+ }
240
+
241
+ updateAgentConfig(id: string, config: WorkspaceAgentConfig): void {
242
+ const entry = this.agents.get(id);
243
+ if (!entry) return;
244
+ const conflict = this.validateOutputs(config, id);
245
+ if (conflict) throw new Error(conflict);
246
+ const cycleErr = this.detectCycle(id, config.dependsOn);
247
+ if (cycleErr) throw new Error(cycleErr);
248
+ if (entry.worker && entry.state.taskStatus === 'running') {
249
+ entry.worker.stop();
250
+ }
251
+ entry.config = config;
252
+ // Reset status but keep history/artifacts (don't wipe logs)
253
+ entry.state.taskStatus = 'idle';
254
+ entry.state.error = undefined;
255
+ entry.worker = null;
256
+ // Restart watch if config changed
257
+ if (this.daemonActive) {
258
+ this.watchManager.startWatch(id, config);
259
+ }
260
+ this.saveNow();
261
+ this.emitAgentsChanged();
262
+ // Push status update so frontend reflects the reset
263
+ this.emit('event', { type: 'task_status', agentId: id, taskStatus: 'idle' } satisfies WorkerEvent);
264
+ }
265
+
266
+ getAgentState(id: string): Readonly<AgentState> | undefined {
267
+ return this.agents.get(id)?.state;
268
+ }
269
+
270
+ getAllAgentStates(): Record<string, AgentState> {
271
+ const result: Record<string, AgentState> = {};
272
+ for (const [id, entry] of this.agents) {
273
+ const workerState = entry.worker?.getState();
274
+ // Merge: worker state for task/smith, entry.state for mode (orchestrator controls mode)
275
+ result[id] = workerState
276
+ ? { ...workerState, mode: entry.state.mode }
277
+ : entry.state;
278
+ }
279
+ return result;
280
+ }
281
+
282
+ // ─── Execution ─────────────────────────────────────────
283
+
284
+ /**
285
+ * Complete an Input node — set its content and mark as done.
286
+ * If re-submitted, resets downstream agents so they can re-run.
287
+ */
288
+ completeInput(agentId: string, content: string): void {
289
+ const entry = this.agents.get(agentId);
290
+ if (!entry || entry.config.type !== 'input') return;
291
+
292
+ const isUpdate = entry.state.taskStatus === 'done';
293
+
294
+ // Append to entries (incremental, not overwrite)
295
+ if (!entry.config.entries) entry.config.entries = [];
296
+ entry.config.entries.push({ content, timestamp: Date.now() });
297
+ // Keep bounded — max 100 entries, oldest removed
298
+ if (entry.config.entries.length > 100) {
299
+ entry.config.entries = entry.config.entries.slice(-100);
300
+ }
301
+ // Also set content to latest for backward compat
302
+ entry.config.content = content;
303
+
304
+ entry.state.taskStatus = 'done';
305
+ entry.state.completedAt = Date.now();
306
+ entry.state.artifacts = [{ type: 'text', summary: content.slice(0, 200) }];
307
+
308
+ this.emit('event', { type: 'task_status', agentId, taskStatus: 'done' } satisfies WorkerEvent);
309
+ this.emit('event', { type: 'done', agentId, summary: 'Input provided' } satisfies WorkerEvent);
310
+ this.emitAgentsChanged(); // push updated entries to frontend
311
+ this.bus.notifyTaskComplete(agentId, [], content.slice(0, 200));
312
+
313
+ // Send input_updated messages to downstream agents via bus
314
+ // routeMessageToAgent handles auto-execution for active smiths
315
+ for (const [id, downstream] of this.agents) {
316
+ if (downstream.config.type === 'input') continue;
317
+ if (!downstream.config.dependsOn.includes(agentId)) continue;
318
+ this.bus.send(agentId, id, 'notify', {
319
+ action: 'input_updated',
320
+ content: content.slice(0, 500),
321
+ });
322
+ console.log(`[bus] Input → ${downstream.config.label}: input_updated`);
323
+ }
324
+
325
+ this.saveNow();
326
+ }
327
+
328
+ /** Reset an agent and all its downstream to idle (for re-run) */
329
+ resetAgent(agentId: string): void {
330
+ const entry = this.agents.get(agentId);
331
+ if (!entry) return;
332
+ if (entry.worker) entry.worker.stop();
333
+ entry.worker = null;
334
+ // Kill orphaned tmux session if manual agent
335
+ if (entry.state.tmuxSession) {
336
+ try {
337
+ const { execSync } = require('node:child_process');
338
+ execSync(`tmux kill-session -t "${entry.state.tmuxSession}" 2>/dev/null`, { timeout: 3000 });
339
+ console.log(`[workspace] Killed tmux session ${entry.state.tmuxSession}`);
340
+ } catch {} // session might already be dead
341
+ }
342
+ entry.state = { smithStatus: 'down', mode: 'auto', taskStatus: 'idle', history: entry.state.history, artifacts: [] };
343
+ this.emit('event', { type: 'task_status', agentId, taskStatus: 'idle' } satisfies WorkerEvent);
344
+ this.emitAgentsChanged();
345
+ this.saveNow();
346
+ }
347
+
348
+ /** Reset all agents that depend on the given agent (recursively) */
349
+ private resetDownstream(agentId: string, visited = new Set<string>()): void {
350
+ if (visited.has(agentId)) return; // cycle protection
351
+ visited.add(agentId);
352
+
353
+ for (const [id, entry] of this.agents) {
354
+ if (id === agentId) continue;
355
+ if (!entry.config.dependsOn.includes(agentId)) continue;
356
+ if (entry.state.taskStatus === 'idle') continue;
357
+ console.log(`[workspace] Resetting ${entry.config.label} (${id}) to idle (upstream ${agentId} changed)`);
358
+ if (entry.worker) entry.worker.stop();
359
+ entry.worker = null;
360
+ entry.state = { smithStatus: entry.state.smithStatus, mode: entry.state.mode, taskStatus: 'idle', history: entry.state.history, artifacts: [], cliSessionId: entry.state.cliSessionId };
361
+ this.emit('event', { type: 'task_status', agentId: id, taskStatus: 'idle' } satisfies WorkerEvent);
362
+ this.resetDownstream(id, visited);
363
+ }
364
+ }
365
+
366
+ /** Validate that an agent can run (sync check). Throws on error. */
367
+ validateCanRun(agentId: string): void {
368
+ const entry = this.agents.get(agentId);
369
+ if (!entry) throw new Error(`Agent "${agentId}" not found`);
370
+ if (entry.config.type === 'input') return;
371
+ if (entry.state.taskStatus === 'running') throw new Error(`Agent "${entry.config.label}" is already running`);
372
+ for (const depId of entry.config.dependsOn) {
373
+ const dep = this.agents.get(depId);
374
+ if (!dep) throw new Error(`Dependency "${depId}" not found (deleted?). Edit the agent to fix.`);
375
+ if (dep.state.taskStatus !== 'done') {
376
+ const hint = dep.state.taskStatus === 'idle' ? ' (never executed — run it first)'
377
+ : dep.state.taskStatus === 'failed' ? ' (failed — retry it first)'
378
+ : dep.state.taskStatus === 'running' ? ' (still running — wait for it to finish)'
379
+ : '';
380
+ throw new Error(`Dependency "${dep.config.label}" not completed yet${hint}`);
381
+ }
382
+ }
383
+ }
384
+
385
+ /** Run a specific agent. Requires daemon mode. force=true bypasses status checks (for retry). */
386
+ async runAgent(agentId: string, userInput?: string, force = false): Promise<void> {
387
+ if (!this.daemonActive) {
388
+ throw new Error('Start daemon first before running agents');
389
+ }
390
+ const label = this.agents.get(agentId)?.config.label || agentId;
391
+ console.log(`[workspace] runAgent(${label}, force=${force})`, new Error().stack?.split('\n').slice(2, 5).join(' <- '));
392
+ return this.runAgentDaemon(agentId, userInput, force);
393
+ }
394
+
395
+ /** @deprecated Use runAgent (which now delegates to daemon mode) */
396
+ private async runAgentLegacy(agentId: string, userInput?: string): Promise<void> {
397
+ const entry = this.agents.get(agentId);
398
+ if (!entry) throw new Error(`Agent "${agentId}" not found`);
399
+
400
+ // Input nodes are completed via completeInput(), not run
401
+ if (entry.config.type === 'input') {
402
+ if (userInput) this.completeInput(agentId, userInput);
403
+ return;
404
+ }
405
+
406
+ if (entry.state.taskStatus === 'running') return;
407
+
408
+ // Allow re-running done/failed/idle(was interrupted)/waiting_approval agents — reset them first
409
+ let resumeFromCheckpoint = false;
410
+ if (entry.state.taskStatus === 'done' || entry.state.taskStatus === 'failed' || entry.state.taskStatus === 'idle' || this.approvalQueue.has(agentId)) {
411
+ this.approvalQueue.delete(agentId);
412
+ console.log(`[workspace] Re-running ${entry.config.label} (was taskStatus=${entry.state.taskStatus})`);
413
+ // For failed: keep lastCheckpoint for resume
414
+ resumeFromCheckpoint = (entry.state.taskStatus === 'failed')
415
+ && entry.state.lastCheckpoint !== undefined;
416
+ if (entry.worker) entry.worker.stop();
417
+ entry.worker = null;
418
+ if (!resumeFromCheckpoint) {
419
+ entry.state = { smithStatus: entry.state.smithStatus, mode: entry.state.mode, taskStatus: 'idle', history: entry.state.history, artifacts: [], cliSessionId: entry.state.cliSessionId };
420
+ } else {
421
+ entry.state.taskStatus = 'idle';
422
+ entry.state.error = undefined;
423
+ entry.state.mode = 'auto';
424
+ }
425
+ }
426
+
427
+ const { config } = entry;
428
+
429
+ // Check if all dependencies are done
430
+ for (const depId of config.dependsOn) {
431
+ const dep = this.agents.get(depId);
432
+ if (!dep || dep.state.taskStatus !== 'done') {
433
+ throw new Error(`Dependency "${dep?.config.label || depId}" not completed yet`);
434
+ }
435
+ }
436
+
437
+ // Build upstream context from dependencies (includes Input node content)
438
+ let upstreamContext = this.buildUpstreamContext(config);
439
+ if (userInput) {
440
+ const prefix = '## Additional Instructions:\n' + userInput;
441
+ upstreamContext = upstreamContext ? prefix + '\n\n---\n\n' + upstreamContext : prefix;
442
+ }
443
+
444
+ // Create backend
445
+ const backend = this.createBackend(config, agentId);
446
+
447
+ // Create worker with bus callbacks for inter-agent communication
448
+ // Load agent memory
449
+ const memory = loadMemory(this.workspaceId, agentId);
450
+ const memoryContext = formatMemoryForPrompt(memory);
451
+
452
+ const peerAgentIds = Array.from(this.agents.keys()).filter(id => id !== agentId);
453
+ const worker = new AgentWorker({
454
+ config,
455
+ backend,
456
+ projectPath: this.projectPath, workspaceId: this.workspaceId,
457
+ peerAgentIds,
458
+ memoryContext: memoryContext || undefined,
459
+ onBusSend: (to, content) => {
460
+ this.bus.send(agentId, to, 'notify', { action: 'agent_message', content });
461
+ },
462
+ onBusRequest: async (to, question) => {
463
+ const response = await this.bus.request(agentId, to, { action: 'question', content: question });
464
+ return response.payload.content || '(no response)';
465
+ },
466
+ onMemoryUpdate: (stepResults) => {
467
+ this.updateAgentMemory(agentId, config, stepResults);
468
+ },
469
+ });
470
+ entry.worker = worker;
471
+
472
+ // Forward worker events
473
+ worker.on('event', (event: WorkerEvent) => {
474
+ // Sync state
475
+ entry.state = worker.getState() as AgentState;
476
+
477
+ // Persist log entries to disk
478
+ if (event.type === 'log') {
479
+ appendAgentLog(this.workspaceId, agentId, event.entry).catch(() => {});
480
+ }
481
+
482
+ this.emit('event', event);
483
+
484
+ // Update liveness
485
+ if (event.type === 'task_status' || event.type === 'smith_status') {
486
+ this.updateAgentLiveness(agentId);
487
+ }
488
+
489
+ // On step complete → capture observation + notify bus
490
+ if (event.type === 'step') {
491
+ const step = config.steps[event.stepIndex];
492
+ if (step) {
493
+ this.bus.notifyStepComplete(agentId, step.label);
494
+
495
+ // Capture memory observation from the previous step's result
496
+ const prevStepIdx = event.stepIndex - 1;
497
+ if (prevStepIdx >= 0) {
498
+ const prevStep = config.steps[prevStepIdx];
499
+ const prevResult = entry.state.history
500
+ .filter(h => h.type === 'result' && h.subtype === 'step_complete')
501
+ .slice(-1)[0];
502
+ if (prevResult && prevStep) {
503
+ const obs = parseStepToObservations(prevStep.label, prevResult.content, entry.state.artifacts);
504
+ for (const o of obs) {
505
+ addObservation(this.workspaceId, agentId, config.label, config.role, o).catch(() => {});
506
+ }
507
+ }
508
+ }
509
+ }
510
+ }
511
+
512
+ // On done → notify + trigger downstream (or reply to sender if from downstream)
513
+ if (event.type === 'done') {
514
+ this.handleAgentDone(agentId, entry, event.summary);
515
+
516
+ this.emitWorkspaceStatus();
517
+ this.checkWorkspaceComplete();
518
+
519
+ // Note: no auto-rerun. Bus messages that need re-run go through user approval.
520
+ }
521
+
522
+ // On error → notify bus
523
+ if (event.type === 'error') {
524
+ this.bus.notifyError(agentId, event.error);
525
+ this.emitWorkspaceStatus();
526
+ }
527
+ });
528
+
529
+ // Inject only undelivered (pending) bus messages addressed to this agent
530
+ const pendingMsgs = this.bus.getPendingMessagesFor(agentId)
531
+ .filter(m => m.from !== agentId); // don't inject own messages
532
+ for (const msg of pendingMsgs) {
533
+ const fromLabel = this.agents.get(msg.from)?.config.label || msg.from;
534
+ worker.injectMessage({
535
+ type: 'system',
536
+ subtype: 'bus_message',
537
+ content: `[From ${fromLabel}]: ${msg.payload.content || msg.payload.action}`,
538
+ timestamp: new Date(msg.timestamp).toISOString(),
539
+ });
540
+ // Mark as delivered + ACK so sender knows it was received
541
+ msg.status = 'done';
542
+ }
543
+
544
+ // Start from checkpoint if recovering from failure
545
+ const startStep = resumeFromCheckpoint && entry.state.lastCheckpoint !== undefined
546
+ ? entry.state.lastCheckpoint + 1
547
+ : 0;
548
+
549
+ this.emitWorkspaceStatus();
550
+
551
+ // Execute (non-blocking — fire and forget, events handle the rest)
552
+ worker.execute(startStep, upstreamContext).catch(err => {
553
+ // Only set failed if worker didn't already handle it (avoid duplicate error events)
554
+ if (entry.state.taskStatus !== 'failed') {
555
+ entry.state.taskStatus = 'failed';
556
+ entry.state.error = err?.message || String(err);
557
+ this.emit('event', { type: 'error', agentId, error: entry.state.error! } satisfies WorkerEvent);
558
+ }
559
+ });
560
+ }
561
+
562
+ /** Run all agents — starts daemon if not active, then runs all ready agents */
563
+ async runAll(): Promise<void> {
564
+ if (!this.daemonActive) {
565
+ return this.startDaemon();
566
+ }
567
+ const ready = this.getDaemonReadyAgents();
568
+ await Promise.all(ready.map(id => this.runAgentDaemon(id)));
569
+ }
570
+
571
+ /** Run a single agent in daemon mode. force=true resets failed/interrupted agents. triggerMessageId tracks which bus message started this. */
572
+ async runAgentDaemon(agentId: string, userInput?: string, force = false, triggerMessageId?: string): Promise<void> {
573
+ const entry = this.agents.get(agentId);
574
+ if (!entry) throw new Error(`Agent "${agentId}" not found`);
575
+
576
+ if (entry.config.type === 'input') {
577
+ if (userInput) this.completeInput(agentId, userInput);
578
+ return;
579
+ }
580
+
581
+ if (entry.state.taskStatus === 'running' && !force) return;
582
+ // Already has a daemon worker running → skip (unless force retry)
583
+ if (entry.worker && entry.state.smithStatus === 'active' && !force) return;
584
+
585
+ // Already done → enter daemon listening directly (don't re-run steps)
586
+ if (entry.state.taskStatus === 'done' && !force) {
587
+ return this.enterDaemonListening(agentId);
588
+ }
589
+
590
+ if (!force) {
591
+ // Failed → leave as-is, user must retry explicitly
592
+ if (entry.state.taskStatus === 'failed') return;
593
+ // waiting_approval → leave as-is
594
+ if (this.approvalQueue.has(agentId)) return;
595
+ }
596
+
597
+ // Reset state for fresh start — preserve smithStatus and mode
598
+ if (entry.state.taskStatus !== 'idle') {
599
+ this.approvalQueue.delete(agentId);
600
+ if (entry.worker) entry.worker.stop();
601
+ entry.worker = null;
602
+ entry.state = {
603
+ smithStatus: entry.state.smithStatus,
604
+ mode: entry.state.mode,
605
+ taskStatus: 'idle',
606
+ history: [],
607
+ artifacts: [],
608
+ cliSessionId: entry.state.cliSessionId, // preserve session for --resume
609
+ };
610
+ }
611
+
612
+ // Ensure smith is active when daemon starts this agent
613
+ if (this.daemonActive && entry.state.smithStatus !== 'active') {
614
+ entry.state.smithStatus = 'active';
615
+ this.emit('event', { type: 'smith_status', agentId, smithStatus: 'active', mode: entry.state.mode } satisfies WorkerEvent);
616
+ }
617
+
618
+ const { config } = entry;
619
+
620
+ // Check dependencies
621
+ for (const depId of config.dependsOn) {
622
+ const dep = this.agents.get(depId);
623
+ if (!dep) throw new Error(`Dependency "${depId}" not found`);
624
+ if (force) {
625
+ // Manual trigger: only require upstream smith to be active (online)
626
+ if (dep.config.type !== 'input' && dep.state.smithStatus !== 'active') {
627
+ throw new Error(`Dependency "${dep.config.label}" smith is not active — start daemon first`);
628
+ }
629
+ } else {
630
+ // Auto trigger: require upstream task completed
631
+ if (dep.state.taskStatus !== 'done') {
632
+ throw new Error(`Dependency "${dep.config.label}" not completed yet`);
633
+ }
634
+ }
635
+ }
636
+
637
+ let upstreamContext = this.buildUpstreamContext(config);
638
+ if (userInput) {
639
+ const prefix = '## Additional Instructions:\n' + userInput;
640
+ upstreamContext = upstreamContext ? prefix + '\n\n---\n\n' + upstreamContext : prefix;
641
+ }
642
+
643
+ const backend = this.createBackend(config, agentId);
644
+ const memory = loadMemory(this.workspaceId, agentId);
645
+ const memoryContext = formatMemoryForPrompt(memory);
646
+ const peerAgentIds = Array.from(this.agents.keys()).filter(id => id !== agentId);
647
+
648
+ const worker = new AgentWorker({
649
+ config, backend,
650
+ projectPath: this.projectPath, workspaceId: this.workspaceId,
651
+ peerAgentIds,
652
+ memoryContext: memoryContext || undefined,
653
+ onBusSend: (to, content) => {
654
+ this.bus.send(agentId, to, 'notify', { action: 'agent_message', content });
655
+ },
656
+ onBusRequest: async (to, question) => {
657
+ const response = await this.bus.request(agentId, to, { action: 'question', content: question });
658
+ return response.payload.content || '(no response)';
659
+ },
660
+ onMessageDone: (messageId) => {
661
+ const busMsg = this.bus.getLog().find(m => m.id === messageId);
662
+ if (busMsg) {
663
+ busMsg.status = 'done';
664
+ this.emit('event', { type: 'bus_message_status', messageId, status: 'done' } as any);
665
+ this.emitAgentsChanged();
666
+ }
667
+ },
668
+ onMessageFailed: (messageId) => {
669
+ const busMsg = this.bus.getLog().find(m => m.id === messageId);
670
+ if (busMsg) {
671
+ busMsg.status = 'failed';
672
+ this.emit('event', { type: 'bus_message_status', messageId, status: 'failed' } as any);
673
+ this.emitAgentsChanged();
674
+ }
675
+ },
676
+ onMemoryUpdate: (stepResults) => {
677
+ try {
678
+ const observations = stepResults.flatMap((r, i) =>
679
+ parseStepToObservations(config.steps[i]?.label || `Step ${i}`, r, entry.state.artifacts)
680
+ );
681
+ for (const obs of observations) addObservation(this.workspaceId, agentId, config.label, config.role, obs);
682
+ const stepLabels = config.steps.map(s => s.label);
683
+ const summary = buildSessionSummary(stepLabels, stepResults, entry.state.artifacts);
684
+ addSessionSummary(this.workspaceId, agentId, summary);
685
+ } catch {}
686
+ },
687
+ });
688
+
689
+ entry.worker = worker;
690
+
691
+ // Track trigger message so smith can mark it done/failed on completion
692
+ if (triggerMessageId) {
693
+ worker.setProcessingMessage(triggerMessageId);
694
+ }
695
+
696
+ // Forward events (same as runAgent)
697
+ worker.on('event', (event: WorkerEvent) => {
698
+ if (event.type === 'task_status') {
699
+ entry.state.taskStatus = event.taskStatus;
700
+ entry.state.error = event.error;
701
+ if (event.taskStatus === 'running') entry.state.startedAt = Date.now();
702
+ const workerState = worker.getState();
703
+ entry.state.daemonIteration = workerState.daemonIteration;
704
+ }
705
+ if (event.type === 'smith_status') {
706
+ entry.state.smithStatus = event.smithStatus;
707
+ entry.state.mode = event.mode;
708
+ }
709
+ if (event.type === 'log') {
710
+ appendAgentLog(this.workspaceId, agentId, event.entry).catch(() => {});
711
+ }
712
+ this.emit('event', event);
713
+ if (event.type === 'task_status' || event.type === 'smith_status') {
714
+ this.updateAgentLiveness(agentId);
715
+ }
716
+ if (event.type === 'step' && event.stepIndex >= 0) {
717
+ const step = config.steps[event.stepIndex];
718
+ if (step) this.bus.notifyStepComplete(agentId, step.label);
719
+ }
720
+ if (event.type === 'done') {
721
+ this.handleAgentDone(agentId, entry, event.summary);
722
+ }
723
+ if (event.type === 'error') {
724
+ this.bus.notifyError(agentId, event.error);
725
+ this.emitWorkspaceStatus();
726
+ }
727
+ });
728
+
729
+ // Inject pending messages
730
+ const pendingMsgs = this.bus.getPendingMessagesFor(agentId)
731
+ .filter(m => m.from !== agentId);
732
+ for (const msg of pendingMsgs) {
733
+ const fromLabel = this.agents.get(msg.from)?.config.label || msg.from;
734
+ worker.injectMessage({
735
+ type: 'system', subtype: 'bus_message',
736
+ content: `[From ${fromLabel}]: ${msg.payload.content || msg.payload.action}`,
737
+ timestamp: new Date(msg.timestamp).toISOString(),
738
+ });
739
+ msg.status = 'done';
740
+ }
741
+
742
+ this.emitWorkspaceStatus();
743
+
744
+ // Execute in daemon mode (non-blocking)
745
+ worker.executeDaemon(0, upstreamContext).catch(err => {
746
+ if (entry.state.taskStatus !== 'failed') {
747
+ entry.state.taskStatus = 'failed';
748
+ entry.state.error = err?.message || String(err);
749
+ this.emit('event', { type: 'error', agentId, error: entry.state.error! } satisfies WorkerEvent);
750
+ }
751
+ });
752
+ }
753
+
754
+ /** Start all agents in daemon mode — orchestrator manages each smith's lifecycle */
755
+ async startDaemon(): Promise<void> {
756
+ if (this.daemonActive) return;
757
+ this.daemonActive = true;
758
+ console.log(`[workspace] Starting daemon mode...`);
759
+
760
+ // Clean up stale state from previous run
761
+ this.bus.markAllRunningAsFailed();
762
+
763
+ // Install forge skills globally (once per daemon start)
764
+ try {
765
+ installForgeSkills(this.projectPath, this.workspaceId, '', Number(process.env.PORT) || 8403);
766
+ } catch {}
767
+
768
+ // Start each smith one by one, verify each starts correctly
769
+ let started = 0;
770
+ let failed = 0;
771
+ for (const [id, entry] of this.agents) {
772
+ if (entry.config.type === 'input') continue;
773
+
774
+ // Kill any stale worker from previous run
775
+ if (entry.worker) {
776
+ entry.worker.stop();
777
+ entry.worker = null;
778
+ }
779
+
780
+ // Stop any existing message loop
781
+ this.stopMessageLoop(id);
782
+
783
+ try {
784
+ // 1. Start daemon listening loop (creates worker)
785
+ this.enterDaemonListening(id);
786
+
787
+ // 2. Verify worker was created
788
+ if (!entry.worker) {
789
+ throw new Error('Worker not created');
790
+ }
791
+
792
+ // 3. Set smith status to active
793
+ entry.state.smithStatus = 'active';
794
+ entry.state.mode = 'auto';
795
+ entry.state.error = undefined;
796
+
797
+ // 4. Start message consumption loop
798
+ this.startMessageLoop(id);
799
+
800
+ // 5. Update liveness for bus routing
801
+ this.updateAgentLiveness(id);
802
+
803
+ // 6. Notify frontend
804
+ this.emit('event', { type: 'smith_status', agentId: id, smithStatus: 'active', mode: 'auto' } satisfies WorkerEvent);
805
+
806
+ started++;
807
+ console.log(`[daemon] ✓ ${entry.config.label}: active (task=${entry.state.taskStatus})`);
808
+ } catch (err: any) {
809
+ entry.state.smithStatus = 'down';
810
+ entry.state.error = `Failed to start: ${err.message}`;
811
+ this.emit('event', { type: 'smith_status', agentId: id, smithStatus: 'down', mode: 'auto' } satisfies WorkerEvent);
812
+ failed++;
813
+ console.error(`[daemon] ✗ ${entry.config.label}: failed — ${err.message}`);
814
+ }
815
+ }
816
+
817
+ // Start watch loops for agents with watch config
818
+ this.watchManager.start();
819
+
820
+ // Start health check — monitor all agents every 10s, auto-heal
821
+ this.startHealthCheck();
822
+
823
+ console.log(`[workspace] Daemon started: ${started} smiths active, ${failed} failed`);
824
+ this.emitAgentsChanged();
825
+ }
826
+
827
+ /** Get agents that can start in daemon mode (idle, done — with deps met) */
828
+ private getDaemonReadyAgents(): string[] {
829
+ const ready: string[] = [];
830
+ for (const [id, entry] of this.agents) {
831
+ if (entry.config.type === 'input') continue;
832
+ if (entry.state.taskStatus === 'running' || entry.state.smithStatus === 'active') {
833
+ console.log(`[daemon] ${entry.config.label}: already smithStatus=${entry.state.smithStatus} taskStatus=${entry.state.taskStatus}`);
834
+ continue;
835
+ }
836
+ const allDepsDone = entry.config.dependsOn.every(depId => {
837
+ const dep = this.agents.get(depId);
838
+ return dep && (dep.state.taskStatus === 'done');
839
+ });
840
+ if (allDepsDone) {
841
+ console.log(`[daemon] ${entry.config.label}: ready (taskStatus=${entry.state.taskStatus})`);
842
+ ready.push(id);
843
+ } else {
844
+ const unmet = entry.config.dependsOn.filter(d => {
845
+ const dep = this.agents.get(d);
846
+ return !dep || (dep.state.taskStatus !== 'done');
847
+ }).map(d => this.agents.get(d)?.config.label || d);
848
+ console.log(`[daemon] ${entry.config.label}: not ready — deps unmet: ${unmet.join(', ')} (taskStatus=${entry.state.taskStatus})`);
849
+ }
850
+ }
851
+ return ready;
852
+ }
853
+
854
+ /** Put a done agent into daemon listening mode without re-running steps */
855
+ private enterDaemonListening(agentId: string): void {
856
+ const entry = this.agents.get(agentId);
857
+ if (!entry) return;
858
+
859
+ // Stop existing worker first to prevent duplicate execution
860
+ if (entry.worker) {
861
+ entry.worker.removeAllListeners();
862
+ entry.worker.stop();
863
+ entry.worker = null;
864
+ }
865
+
866
+ const { config } = entry;
867
+
868
+ const backend = this.createBackend(config, agentId);
869
+ const peerAgentIds = Array.from(this.agents.keys()).filter(id => id !== agentId);
870
+
871
+ const worker = new AgentWorker({
872
+ config, backend,
873
+ projectPath: this.projectPath, workspaceId: this.workspaceId,
874
+ peerAgentIds,
875
+ initialTaskStatus: entry.state.taskStatus, // preserve current task status
876
+ onBusSend: (to, content) => {
877
+ this.bus.send(agentId, to, 'notify', { action: 'agent_message', content });
878
+ },
879
+ onBusRequest: async (to, question) => {
880
+ const response = await this.bus.request(agentId, to, { action: 'question', content: question });
881
+ return response.payload.content || '(no response)';
882
+ },
883
+ onMessageDone: (messageId) => {
884
+ const busMsg = this.bus.getLog().find(m => m.id === messageId);
885
+ if (busMsg) {
886
+ busMsg.status = 'done';
887
+ this.emit('event', { type: 'bus_message_status', messageId, status: 'done' } as any);
888
+ this.emitAgentsChanged();
889
+ }
890
+ },
891
+ onMessageFailed: (messageId) => {
892
+ const busMsg = this.bus.getLog().find(m => m.id === messageId);
893
+ if (busMsg) {
894
+ busMsg.status = 'failed';
895
+ this.emit('event', { type: 'bus_message_status', messageId, status: 'failed' } as any);
896
+ this.emitAgentsChanged();
897
+ }
898
+ },
899
+ });
900
+
901
+ entry.worker = worker;
902
+
903
+ // Forward events (same handler as runAgentDaemon)
904
+ worker.on('event', (event: WorkerEvent) => {
905
+ if (event.type === 'task_status') {
906
+ entry.state.taskStatus = event.taskStatus;
907
+ entry.state.error = event.error;
908
+ const workerState = worker.getState();
909
+ entry.state.daemonIteration = workerState.daemonIteration;
910
+ }
911
+ if (event.type === 'smith_status') {
912
+ entry.state.smithStatus = event.smithStatus;
913
+ entry.state.mode = event.mode;
914
+ }
915
+ if (event.type === 'log') {
916
+ appendAgentLog(this.workspaceId, agentId, event.entry).catch(() => {});
917
+ }
918
+ this.emit('event', event);
919
+ if (event.type === 'task_status' || event.type === 'smith_status') {
920
+ this.updateAgentLiveness(agentId);
921
+ }
922
+ if (event.type === 'done') {
923
+ this.handleAgentDone(agentId, entry, event.summary);
924
+ }
925
+ if (event.type === 'error') {
926
+ this.bus.notifyError(agentId, event.error);
927
+ }
928
+ });
929
+
930
+ // Message loop (startMessageLoop) handles auto-consumption of pending messages
931
+
932
+ console.log(`[workspace] Agent "${config.label}" entering daemon listening (task=${entry.state.taskStatus})`);
933
+
934
+ // executeDaemon with skipSteps=true → goes directly to listening loop
935
+ worker.executeDaemon(0, undefined, true).catch(err => {
936
+ console.error(`[workspace] enterDaemonListening error for ${config.label}:`, err.message);
937
+ });
938
+ }
939
+
940
+ /** Stop all agents (exit daemon mode) */
941
+ /** Stop all agents — orchestrator shuts down each smith */
942
+ stopDaemon(): void {
943
+ this.daemonActive = false;
944
+ console.log('[workspace] Stopping daemon...');
945
+
946
+ for (const [id, entry] of this.agents) {
947
+ if (entry.config.type === 'input') continue;
948
+
949
+ // 1. Stop message loop
950
+ this.stopMessageLoop(id);
951
+
952
+ // 2. Stop worker
953
+ if (entry.worker) {
954
+ entry.worker.stop();
955
+ entry.worker = null;
956
+ }
957
+
958
+ // 3. Set smith down
959
+ entry.state.smithStatus = 'down';
960
+ entry.state.error = undefined;
961
+ this.updateAgentLiveness(id);
962
+ this.emit('event', { type: 'smith_status', agentId: id, smithStatus: 'down', mode: entry.state.mode } satisfies WorkerEvent);
963
+
964
+ console.log(`[daemon] ■ ${entry.config.label}: stopped`);
965
+ }
966
+
967
+ // Mark running messages as failed
968
+ this.bus.markAllRunningAsFailed();
969
+ this.emitAgentsChanged();
970
+ this.watchManager.stop();
971
+ this.stopHealthCheck();
972
+ console.log('[workspace] Daemon stopped');
973
+ }
974
+
975
+ // ─── Health Check — auto-heal agents ─────────────────
976
+
977
+ private startHealthCheck(): void {
978
+ if (this.healthCheckTimer) return;
979
+ this.healthCheckTimer = setInterval(() => this.runHealthCheck(), 10_000);
980
+ this.healthCheckTimer.unref();
981
+ }
982
+
983
+ private stopHealthCheck(): void {
984
+ if (this.healthCheckTimer) {
985
+ clearInterval(this.healthCheckTimer);
986
+ this.healthCheckTimer = null;
987
+ }
988
+ }
989
+
990
+ private runHealthCheck(): void {
991
+ if (!this.daemonActive) return;
992
+
993
+ for (const [id, entry] of this.agents) {
994
+ if (entry.config.type === 'input') continue;
995
+ if (entry.state.mode === 'manual') continue;
996
+
997
+ // Check 1: Worker should exist for all active agents
998
+ if (!entry.worker) {
999
+ console.log(`[health] ${entry.config.label}: no worker — recreating`);
1000
+ this.enterDaemonListening(id);
1001
+ entry.state.smithStatus = 'active';
1002
+ this.emit('event', { type: 'smith_status', agentId: id, smithStatus: 'active', mode: entry.state.mode } as any);
1003
+ continue;
1004
+ }
1005
+
1006
+ // Check 2: SmithStatus should be active
1007
+ if (entry.state.smithStatus !== 'active') {
1008
+ console.log(`[health] ${entry.config.label}: smith=${entry.state.smithStatus} — setting active`);
1009
+ entry.state.smithStatus = 'active';
1010
+ this.emit('event', { type: 'smith_status', agentId: id, smithStatus: 'active', mode: entry.state.mode } as any);
1011
+ }
1012
+
1013
+ // Check 3: Message loop should be running
1014
+ if (!this.messageLoopTimers.has(id)) {
1015
+ console.log(`[health] ${entry.config.label}: message loop stopped — restarting`);
1016
+ this.startMessageLoop(id);
1017
+ }
1018
+
1019
+ // Check 4: Stale running messages (agent not actually running) → mark failed
1020
+ if (entry.state.taskStatus !== 'running') {
1021
+ const staleRunning = this.bus.getLog().filter(m => m.to === id && m.status === 'running' && m.type !== 'ack');
1022
+ for (const m of staleRunning) {
1023
+ const age = Date.now() - m.timestamp;
1024
+ if (age > 60_000) { // running for 60s+ but agent is idle = stale
1025
+ console.log(`[health] ${entry.config.label}: stale running message ${m.id.slice(0, 8)} (${Math.round(age/1000)}s) — marking failed`);
1026
+ m.status = 'failed';
1027
+ this.emit('event', { type: 'bus_message_status', messageId: m.id, status: 'failed' } as any);
1028
+ }
1029
+ }
1030
+ }
1031
+
1032
+ // Check 5: Pending messages but agent idle — try wake
1033
+ if (entry.state.taskStatus !== 'running' && entry.state.mode === 'auto') {
1034
+ const pending = this.bus.getPendingMessagesFor(id).filter(m => m.from !== id && m.type !== 'ack');
1035
+ if (pending.length > 0 && entry.worker.isListening()) {
1036
+ // Message loop should handle this, but if it didn't, log it
1037
+ const age = Date.now() - pending[0].timestamp;
1038
+ if (age > 30_000) { // stuck for 30+ seconds
1039
+ console.log(`[health] ${entry.config.label}: ${pending.length} pending msg(s) stuck for ${Math.round(age/1000)}s — message loop should pick up`);
1040
+ }
1041
+ }
1042
+ }
1043
+ }
1044
+ }
1045
+
1046
+ /** Handle watch alert based on agent's configured action */
1047
+ private handleWatchAlert(agentId: string, summary: string): void {
1048
+ const entry = this.agents.get(agentId);
1049
+ if (!entry) return;
1050
+ const action = entry.config.watch?.action || 'log';
1051
+
1052
+ if (action === 'log') {
1053
+ // Already logged by watch-manager, nothing more to do
1054
+ return;
1055
+ }
1056
+
1057
+ if (action === 'analyze') {
1058
+ // Auto-wake agent to analyze changes (skip if busy/manual)
1059
+ if (entry.state.mode === 'manual' || entry.state.taskStatus === 'running') {
1060
+ console.log(`[watch] ${entry.config.label}: skipped analyze (mode=${entry.state.mode} task=${entry.state.taskStatus})`);
1061
+ return;
1062
+ }
1063
+ if (!entry.worker?.isListening()) {
1064
+ console.log(`[watch] ${entry.config.label}: skipped analyze (worker=${!!entry.worker} listening=${entry.worker?.isListening()})`);
1065
+ return;
1066
+ }
1067
+ console.log(`[watch] ${entry.config.label}: triggering analyze`);
1068
+
1069
+ const prompt = entry.config.watch?.prompt || 'Analyze the following changes and produce a report:';
1070
+ const logEntry = {
1071
+ type: 'system' as const,
1072
+ subtype: 'watch_trigger',
1073
+ content: `[Watch] ${prompt}\n\n${summary}`,
1074
+ timestamp: new Date().toISOString(),
1075
+ };
1076
+ entry.worker.wake({ type: 'bus_message', messages: [logEntry] });
1077
+ console.log(`[watch] ${entry.config.label}: auto-analyzing detected changes`);
1078
+ return;
1079
+ }
1080
+
1081
+ if (action === 'approve') {
1082
+ // Create message with pending_approval status — user must approve to execute
1083
+ const msg = this.bus.send('_watch', agentId, 'notify', {
1084
+ action: 'watch_changes',
1085
+ content: `Watch detected changes (awaiting approval):\n${summary}`,
1086
+ });
1087
+ msg.status = 'pending_approval';
1088
+ this.emit('event', { type: 'bus_message_status', messageId: msg.id, status: 'pending_approval' } as any);
1089
+ console.log(`[watch] ${entry.config.label}: changes detected, awaiting approval`);
1090
+ }
1091
+ }
1092
+
1093
+ /** Check if daemon mode is active */
1094
+ isDaemonActive(): boolean {
1095
+ return this.daemonActive;
1096
+ }
1097
+
1098
+ /** Pause a running agent */
1099
+ pauseAgent(agentId: string): void {
1100
+ const entry = this.agents.get(agentId);
1101
+ entry?.worker?.pause();
1102
+ }
1103
+
1104
+ /** Resume a paused agent */
1105
+ resumeAgent(agentId: string): void {
1106
+ const entry = this.agents.get(agentId);
1107
+ entry?.worker?.resume();
1108
+ }
1109
+
1110
+ /** Stop a running agent */
1111
+ stopAgent(agentId: string): void {
1112
+ const entry = this.agents.get(agentId);
1113
+ entry?.worker?.stop();
1114
+ }
1115
+
1116
+ /** Retry a failed agent from its last checkpoint */
1117
+ async retryAgent(agentId: string): Promise<void> {
1118
+ const entry = this.agents.get(agentId);
1119
+ if (!entry) throw new Error(`Agent "${agentId}" not found`);
1120
+ if (entry.state.taskStatus === 'running') {
1121
+ throw new Error(`Agent "${entry.config.label}" is already running`);
1122
+ }
1123
+ if (entry.state.taskStatus !== 'failed') {
1124
+ throw new Error(`Agent "${entry.config.label}" is ${entry.state.taskStatus}, not failed`);
1125
+ }
1126
+ // force=true: skip dep taskStatus check, only require upstream smith active
1127
+ await this.runAgent(agentId, undefined, true);
1128
+ }
1129
+
1130
+ /** Send a message to a running agent (human intervention) */
1131
+ /** Send a message to a smith — becomes a pending inbox message, processed by message loop */
1132
+ sendMessageToAgent(agentId: string, content: string): void {
1133
+ const entry = this.agents.get(agentId);
1134
+ if (!entry) return;
1135
+
1136
+ // Send via bus → becomes pending inbox message → message loop will consume it
1137
+ this.bus.send('user', agentId, 'notify', {
1138
+ action: 'user_message',
1139
+ content,
1140
+ });
1141
+ }
1142
+
1143
+ /** Approve a waiting agent to start execution */
1144
+ approveAgent(agentId: string): void {
1145
+ if (!this.approvalQueue.has(agentId)) return;
1146
+ this.approvalQueue.delete(agentId);
1147
+ this.runAgent(agentId).catch(() => {});
1148
+ }
1149
+
1150
+ /** Save tmux session name for an agent (for reattach after refresh) */
1151
+ setTmuxSession(agentId: string, sessionName: string): void {
1152
+ const entry = this.agents.get(agentId);
1153
+ if (!entry) return;
1154
+ entry.state.tmuxSession = sessionName;
1155
+ this.saveNow();
1156
+ this.emitAgentsChanged();
1157
+ }
1158
+
1159
+ /** Switch an agent to manual mode (user operates in terminal) */
1160
+ setManualMode(agentId: string): void {
1161
+ const entry = this.agents.get(agentId);
1162
+ if (!entry) return;
1163
+ entry.state.mode = 'manual';
1164
+ this.emit('event', { type: 'smith_status', agentId, smithStatus: entry.state.smithStatus, mode: 'manual' } satisfies WorkerEvent);
1165
+ this.emitAgentsChanged();
1166
+ this.saveNow();
1167
+ console.log(`[workspace] Agent "${entry.config.label}" switched to manual mode`);
1168
+ }
1169
+
1170
+ /** Re-enter daemon mode for an agent after manual terminal is closed */
1171
+ restartAgentDaemon(agentId: string): void {
1172
+ if (!this.daemonActive) return;
1173
+ const entry = this.agents.get(agentId);
1174
+ if (!entry || entry.config.type === 'input') return;
1175
+
1176
+ entry.state.mode = 'auto';
1177
+ entry.state.error = undefined;
1178
+
1179
+ // Recreate worker if needed (resetAgent kills worker)
1180
+ if (!entry.worker) {
1181
+ this.enterDaemonListening(agentId);
1182
+ this.startMessageLoop(agentId);
1183
+ }
1184
+
1185
+ entry.state.smithStatus = 'active';
1186
+ this.emit('event', { type: 'smith_status', agentId, smithStatus: 'active', mode: 'auto' } satisfies WorkerEvent);
1187
+ this.emitAgentsChanged();
1188
+ }
1189
+
1190
+ /** Complete a manual agent — called by forge-done skill from terminal */
1191
+ completeManualAgent(agentId: string, changedFiles: string[]): void {
1192
+ const entry = this.agents.get(agentId);
1193
+ if (!entry) return;
1194
+
1195
+ entry.state.taskStatus = 'done';
1196
+ entry.state.mode = 'auto'; // clear manual mode
1197
+ entry.state.completedAt = Date.now();
1198
+ entry.state.artifacts = changedFiles.map(f => ({ type: 'file' as const, path: f }));
1199
+
1200
+ console.log(`[workspace] Manual agent "${entry.config.label}" marked done. ${changedFiles.length} files changed.`);
1201
+
1202
+ this.emit('event', { type: 'task_status', agentId, taskStatus: 'done' } satisfies WorkerEvent);
1203
+ this.emit('event', { type: 'done', agentId, summary: `Manual: ${changedFiles.length} files changed` } satisfies WorkerEvent);
1204
+ this.emitAgentsChanged();
1205
+
1206
+ // Notify ALL agents that depend on this one (not just direct downstream)
1207
+ this.bus.notifyTaskComplete(agentId, changedFiles, `Manual work: ${changedFiles.length} files`);
1208
+
1209
+ // Send individual bus messages to all downstream agents so they know
1210
+ for (const [id, other] of this.agents) {
1211
+ if (id === agentId || other.config.type === 'input') continue;
1212
+ if (other.config.dependsOn.includes(agentId)) {
1213
+ this.bus.send(agentId, id, 'notify', {
1214
+ action: 'update_notify',
1215
+ content: `${entry.config.label} completed manual work: ${changedFiles.length} files changed`,
1216
+ files: changedFiles,
1217
+ });
1218
+ }
1219
+ }
1220
+
1221
+ if (this.daemonActive) {
1222
+ this.broadcastCompletion(agentId);
1223
+ }
1224
+ this.notifyDownstreamForRevalidation(agentId, changedFiles);
1225
+ this.emitWorkspaceStatus();
1226
+ this.checkWorkspaceComplete();
1227
+ this.saveNow();
1228
+ }
1229
+
1230
+ /** Reject an approval (set agent back to idle) */
1231
+ rejectApproval(agentId: string): void {
1232
+ this.approvalQueue.delete(agentId);
1233
+ const entry = this.agents.get(agentId);
1234
+ if (entry) {
1235
+ entry.state.taskStatus = 'idle';
1236
+ this.emit('event', { type: 'task_status', agentId, taskStatus: 'idle' } satisfies WorkerEvent);
1237
+ }
1238
+ }
1239
+
1240
+ // ─── Bus Access ────────────────────────────────────────
1241
+
1242
+ getBus(): AgentBus {
1243
+ return this.bus;
1244
+ }
1245
+
1246
+ getBusLog(): readonly BusMessage[] {
1247
+ return this.bus.getLog();
1248
+ }
1249
+
1250
+ // ─── State Snapshot (for persistence) ──────────────────
1251
+
1252
+ /** Get full workspace state for auto-save */
1253
+ getFullState(): WorkspaceState {
1254
+ return {
1255
+ id: this.workspaceId,
1256
+ projectPath: this.projectPath,
1257
+ projectName: this.projectName,
1258
+ agents: Array.from(this.agents.values()).map(e => e.config),
1259
+ agentStates: this.getAllAgentStates(),
1260
+ nodePositions: {},
1261
+ busLog: [...this.bus.getLog()],
1262
+ busOutbox: this.bus.getAllOutbox(),
1263
+ createdAt: this.createdAt,
1264
+ updatedAt: Date.now(),
1265
+ };
1266
+ }
1267
+
1268
+ getSnapshot(): {
1269
+ agents: WorkspaceAgentConfig[];
1270
+ agentStates: Record<string, AgentState>;
1271
+ busLog: BusMessage[];
1272
+ daemonActive: boolean;
1273
+ } {
1274
+ return {
1275
+ agents: Array.from(this.agents.values()).map(e => e.config),
1276
+ agentStates: this.getAllAgentStates(),
1277
+ busLog: [...this.bus.getLog()],
1278
+ daemonActive: this.daemonActive,
1279
+ };
1280
+ }
1281
+
1282
+ /** Restore from persisted state */
1283
+ loadSnapshot(data: {
1284
+ agents: WorkspaceAgentConfig[];
1285
+ agentStates: Record<string, AgentState>;
1286
+ busLog: BusMessage[];
1287
+ busOutbox?: Record<string, BusMessage[]>;
1288
+ }): void {
1289
+ this.agents.clear();
1290
+ this.daemonActive = false; // Reset daemon — user must click Start Daemon again after restart
1291
+ for (const config of data.agents) {
1292
+ const state = data.agentStates[config.id] || { smithStatus: 'down' as const, mode: 'auto' as const, taskStatus: 'idle' as const, history: [], artifacts: [] };
1293
+
1294
+ // Migrate old format if loading from pre-two-layer state
1295
+ if ('status' in state && !('smithStatus' in state)) {
1296
+ const oldStatus = (state as any).status;
1297
+ (state as any).smithStatus = 'down';
1298
+ (state as any).mode = (state as any).runMode || 'auto';
1299
+ (state as any).taskStatus = (oldStatus === 'running' || oldStatus === 'listening') ? 'idle' :
1300
+ (oldStatus === 'interrupted') ? 'idle' :
1301
+ (oldStatus === 'waiting_approval') ? 'idle' :
1302
+ (oldStatus === 'paused') ? 'idle' :
1303
+ oldStatus;
1304
+ delete (state as any).status;
1305
+ delete (state as any).runMode;
1306
+ delete (state as any).daemonMode;
1307
+ }
1308
+
1309
+ // Mark running agents as failed (interrupted by restart)
1310
+ if (state.taskStatus === 'running') {
1311
+ state.taskStatus = 'failed';
1312
+ state.error = 'Interrupted by restart';
1313
+ }
1314
+ // Smith is down after restart (no daemon loop running)
1315
+ state.smithStatus = 'down';
1316
+ state.daemonIteration = undefined;
1317
+ this.agents.set(config.id, { config, worker: null, state });
1318
+ }
1319
+ this.bus.loadLog(data.busLog);
1320
+ if (data.busOutbox) {
1321
+ this.bus.loadOutbox(data.busOutbox);
1322
+ }
1323
+
1324
+ // Mark all pending messages as failed (they were lost on shutdown)
1325
+ // Users can retry agents manually if needed
1326
+ // Running messages from before crash → failed (pending stays pending for retry)
1327
+ this.bus.markAllRunningAsFailed();
1328
+
1329
+ // Initialize liveness for all loaded agents so bus delivery works
1330
+ for (const [agentId] of this.agents) {
1331
+ this.updateAgentLiveness(agentId);
1332
+ }
1333
+ }
1334
+
1335
+ /** Stop all agents, save final state, and clean up */
1336
+ shutdown(): void {
1337
+ this.stopAllMessageLoops();
1338
+ stopAutoSave(this.workspaceId);
1339
+ // Sync save — must complete before process exits
1340
+ try { saveWorkspaceSync(this.getFullState()); } catch (err) {
1341
+ console.error(`[workspace] Failed to save on shutdown:`, err);
1342
+ }
1343
+ for (const [, entry] of this.agents) {
1344
+ entry.worker?.stop();
1345
+ }
1346
+ this.bus.clear();
1347
+ }
1348
+
1349
+ // ─── Private ───────────────────────────────────────────
1350
+
1351
+ private createBackend(config: WorkspaceAgentConfig, agentId?: string) {
1352
+ switch (config.backend) {
1353
+ case 'api':
1354
+ return new ApiBackend();
1355
+ case 'cli':
1356
+ default: {
1357
+ // Resume existing claude session if available
1358
+ const existingSessionId = agentId ? this.agents.get(agentId)?.state.cliSessionId : undefined;
1359
+ const backend = new CliBackend(existingSessionId);
1360
+ // Persist new sessionId back to agent state
1361
+ if (agentId) {
1362
+ backend.onSessionId = (id) => {
1363
+ const entry = this.agents.get(agentId);
1364
+ if (entry) entry.state.cliSessionId = id;
1365
+ };
1366
+ }
1367
+ return backend;
1368
+ }
1369
+ }
1370
+ }
1371
+
1372
+ /** Build context string from upstream agents' outputs */
1373
+ private buildUpstreamContext(config: WorkspaceAgentConfig): string | undefined {
1374
+ if (config.dependsOn.length === 0) return undefined;
1375
+
1376
+ const sections: string[] = [];
1377
+
1378
+ for (const depId of config.dependsOn) {
1379
+ const dep = this.agents.get(depId);
1380
+ if (!dep || (dep.state.taskStatus !== 'done')) continue;
1381
+
1382
+ const label = dep.config.label;
1383
+
1384
+ // Input nodes: only send latest entry (not full history)
1385
+ if (dep.config.type === 'input') {
1386
+ const entries = dep.config.entries;
1387
+ if (entries && entries.length > 0) {
1388
+ const latest = entries[entries.length - 1];
1389
+ sections.push(`### ${label} (latest input):\n${latest.content}`);
1390
+ } else if (dep.config.content) {
1391
+ // Legacy fallback
1392
+ sections.push(`### ${label}:\n${dep.config.content}`);
1393
+ }
1394
+ continue;
1395
+ }
1396
+
1397
+ const artifacts = dep.state.artifacts.filter(a => a.path);
1398
+
1399
+ if (artifacts.length === 0) {
1400
+ const lastResult = [...dep.state.history].reverse().find(h => h.type === 'result');
1401
+ if (lastResult) {
1402
+ sections.push(`### From ${label}:\n${lastResult.content}`);
1403
+ }
1404
+ continue;
1405
+ }
1406
+
1407
+ // Read file artifacts
1408
+ for (const artifact of artifacts) {
1409
+ if (!artifact.path) continue;
1410
+ const fullPath = resolve(this.projectPath, artifact.path);
1411
+ try {
1412
+ if (existsSync(fullPath)) {
1413
+ const content = readFileSync(fullPath, 'utf-8');
1414
+ const truncated = content.length > 10000
1415
+ ? content.slice(0, 10000) + '\n... (truncated)'
1416
+ : content;
1417
+ sections.push(`### From ${label} — ${artifact.path}:\n${truncated}`);
1418
+ }
1419
+ } catch {
1420
+ sections.push(`### From ${label} — ${artifact.path}: (could not read file)`);
1421
+ }
1422
+ }
1423
+ }
1424
+
1425
+ if (sections.length === 0) return undefined;
1426
+
1427
+ let combined = sections.join('\n\n---\n\n');
1428
+
1429
+ // Cap total upstream context to ~50K chars (~12K tokens) to prevent token explosion
1430
+ const MAX_UPSTREAM_CHARS = 50000;
1431
+ if (combined.length > MAX_UPSTREAM_CHARS) {
1432
+ combined = combined.slice(0, MAX_UPSTREAM_CHARS) + '\n\n... (upstream context truncated, ' + combined.length + ' chars total)';
1433
+ }
1434
+
1435
+ return combined;
1436
+ }
1437
+
1438
+ /** After an agent completes, check if any downstream agents should be triggered */
1439
+ /**
1440
+ * Broadcast completion to all downstream agents via bus messages.
1441
+ * Replaces direct triggerDownstream — all execution is now message-driven.
1442
+ * If no artifacts/changes, no message is sent → downstream stays idle.
1443
+ */
1444
+ /** Build causedBy from the message currently being processed */
1445
+ private buildCausedBy(agentId: string, entry: { worker: AgentWorker | null }): BusMessage['causedBy'] | undefined {
1446
+ const msgId = entry.worker?.getCurrentMessageId?.();
1447
+ if (!msgId) return undefined;
1448
+ const msg = this.bus.getLog().find(m => m.id === msgId);
1449
+ if (!msg) return undefined;
1450
+ return { messageId: msg.id, from: msg.from, to: msg.to };
1451
+ }
1452
+
1453
+ /** Unified done handler: broadcast downstream or reply to sender based on message source */
1454
+ private handleAgentDone(agentId: string, entry: { config: WorkspaceAgentConfig; worker: AgentWorker | null; state: AgentState }, summary?: string): void {
1455
+ const files = entry.state.artifacts.filter(a => a.path).map(a => a.path!);
1456
+ console.log(`[workspace] Agent "${entry.config.label}" (${agentId}) completed. Artifacts: ${files.length}.`);
1457
+
1458
+ this.bus.notifyTaskComplete(agentId, files, summary);
1459
+
1460
+ // Check what message triggered this execution
1461
+ const causedBy = this.buildCausedBy(agentId, entry);
1462
+ const processedMsg = causedBy ? this.bus.getLog().find(m => m.id === causedBy.messageId) : null;
1463
+
1464
+ if (processedMsg && !this.isUpstream(processedMsg.from, agentId)) {
1465
+ // Processed a message from downstream — no extra reply needed.
1466
+ // The original message is already marked done via markMessageDone().
1467
+ // Sender can check their outbox message status. Only broadcast to downstream.
1468
+ const senderLabel = this.agents.get(processedMsg.from)?.config.label || processedMsg.from;
1469
+ console.log(`[bus] ${entry.config.label}: processed request from ${senderLabel} — marked done, no reply`);
1470
+ // Still broadcast to own downstream (e.g., QA processed Engineer's msg → notify Reviewer)
1471
+ this.broadcastCompletion(agentId, causedBy);
1472
+ } else {
1473
+ // Normal upstream completion or initial execution → broadcast to all downstream
1474
+ this.broadcastCompletion(agentId, causedBy);
1475
+ // notifyDownstreamForRevalidation removed — causes duplicate messages and re-execution loops
1476
+ // Downstream agents that already completed will be handled in future iteration mode
1477
+ }
1478
+
1479
+ this.emitWorkspaceStatus();
1480
+ this.checkWorkspaceComplete?.();
1481
+ }
1482
+
1483
+ private broadcastCompletion(completedAgentId: string, causedBy?: BusMessage['causedBy']): void {
1484
+ const completed = this.agents.get(completedAgentId);
1485
+ if (!completed) return;
1486
+
1487
+ const completedLabel = completed.config.label;
1488
+ const files = completed.state.artifacts.filter(a => a.path).map(a => a.path!);
1489
+ const summary = completed.state.history
1490
+ .filter(h => h.subtype === 'final_summary' || h.subtype === 'step_summary')
1491
+ .slice(-1)[0]?.content || '';
1492
+
1493
+ const content = files.length > 0
1494
+ ? `${completedLabel} completed: ${files.length} files changed. ${summary.slice(0, 200)}`
1495
+ : `${completedLabel} completed. ${summary.slice(0, 300) || 'Check upstream outputs for updates.'}`;
1496
+
1497
+ // Find all downstream agents that depend on this one
1498
+ let sent = 0;
1499
+ for (const [id, entry] of this.agents) {
1500
+ if (id === completedAgentId) continue;
1501
+ if (entry.config.type === 'input') continue;
1502
+ if (!entry.config.dependsOn.includes(completedAgentId)) continue;
1503
+
1504
+ this.bus.send(completedAgentId, id, 'notify', {
1505
+ action: 'upstream_complete',
1506
+ content,
1507
+ files,
1508
+ }, { category: 'notification', causedBy });
1509
+ sent++;
1510
+ console.log(`[bus] ${completedLabel} → ${entry.config.label}: upstream_complete (${files.length} files)`);
1511
+ }
1512
+
1513
+ if (sent === 0) {
1514
+ console.log(`[bus] ${completedLabel} completed — no downstream agents`);
1515
+ }
1516
+ }
1517
+
1518
+ // ─── Agent liveness ─────────────────────────────────────
1519
+
1520
+ private updateAgentLiveness(agentId: string): void {
1521
+ const entry = this.agents.get(agentId);
1522
+ if (!entry) {
1523
+ this.bus.setAgentStatus(agentId, 'down');
1524
+ return;
1525
+ }
1526
+ if (entry.state.taskStatus === 'running') this.bus.setAgentStatus(agentId, 'busy');
1527
+ else if (entry.state.smithStatus === 'active') this.bus.setAgentStatus(agentId, 'alive');
1528
+ else this.bus.setAgentStatus(agentId, 'down');
1529
+ }
1530
+
1531
+ // ─── Bus message handling ──────────────────────────────
1532
+
1533
+ private handleBusMessage(msg: BusMessage): void {
1534
+ // Dedup
1535
+ if (this.bus.isDuplicate(msg.id)) return;
1536
+
1537
+ // Emit to UI after dedup (no duplicates, no ACKs)
1538
+ this.emit('event', { type: 'bus_message', message: msg } satisfies OrchestratorEvent);
1539
+
1540
+ // Route to target
1541
+ this.routeMessageToAgent(msg.to, msg);
1542
+ this.checkWorkspaceComplete();
1543
+ }
1544
+
1545
+ private routeMessageToAgent(targetId: string, msg: BusMessage): void {
1546
+ const target = this.agents.get(targetId);
1547
+ if (!target) return;
1548
+
1549
+ const fromLabel = this.agents.get(msg.from)?.config.label || msg.from;
1550
+ const action = msg.payload.action;
1551
+ const content = msg.payload.content || '';
1552
+
1553
+ console.log(`[bus] ${fromLabel} → ${target.config.label}: ${action} "${content.slice(0, 80)}"`);
1554
+
1555
+ const logEntry = {
1556
+ type: 'system' as const,
1557
+ subtype: 'bus_message',
1558
+ content: `[From ${fromLabel}]: ${content || action}`,
1559
+ timestamp: new Date(msg.timestamp).toISOString(),
1560
+ };
1561
+
1562
+ // Helper: mark message as processed when actually consumed
1563
+ const ackAndDeliver = () => {
1564
+ msg.status = 'done';
1565
+ };
1566
+
1567
+ // ── Input node: request user input ──
1568
+ if (target.config.type === 'input') {
1569
+ if (action === 'info_request' || action === 'question') {
1570
+ ackAndDeliver();
1571
+ this.emit('event', {
1572
+ type: 'user_input_request',
1573
+ agentId: targetId,
1574
+ fromAgent: msg.from,
1575
+ question: content,
1576
+ } satisfies OrchestratorEvent);
1577
+ }
1578
+ return;
1579
+ }
1580
+
1581
+ // ── Store message in agent history ──
1582
+ target.state.history.push(logEntry);
1583
+
1584
+ // ── Manual mode → store in inbox (user handles in terminal) ──
1585
+ if (target.state.mode === 'manual') {
1586
+ ackAndDeliver();
1587
+ console.log(`[bus] ${target.config.label}: received ${action} in manual mode — stored in inbox`);
1588
+ return;
1589
+ }
1590
+
1591
+ // ── requiresApproval → set pending_approval on arrival ──
1592
+ if (target.config.requiresApproval) {
1593
+ msg.status = 'pending_approval';
1594
+ this.emit('event', { type: 'bus_message_status', messageId: msg.id, status: 'pending_approval' } as any);
1595
+ console.log(`[bus] ${target.config.label}: received ${action} — pending approval`);
1596
+ return;
1597
+ }
1598
+
1599
+ // ── Message stays pending — message loop will consume it when smith is ready ──
1600
+ console.log(`[bus] ${target.config.label}: received ${action} — queued in inbox (${msg.status})`);
1601
+ }
1602
+
1603
+ // ─── Message consumption loop ─────────────────────────
1604
+ private messageLoopTimers = new Map<string, NodeJS.Timeout>();
1605
+
1606
+ /** Start the message consumption loop for a smith */
1607
+ private startMessageLoop(agentId: string): void {
1608
+ if (this.messageLoopTimers.has(agentId)) return; // already running
1609
+
1610
+ let debugTick = 0;
1611
+ const tick = () => {
1612
+ const entry = this.agents.get(agentId);
1613
+ if (!entry) {
1614
+ this.stopMessageLoop(agentId);
1615
+ return;
1616
+ }
1617
+
1618
+ // Don't stop loop if smith is down — just skip this tick
1619
+ // (loop stays alive so it works when smith comes back)
1620
+ if (entry.state.smithStatus !== 'active') return;
1621
+
1622
+ // Skip if manual (user in terminal) or running (already busy)
1623
+ if (entry.state.mode === 'manual') return;
1624
+ if (entry.state.taskStatus === 'running') return;
1625
+
1626
+ // Skip if no worker ready — recreate if needed
1627
+ if (!entry.worker) {
1628
+ if (this.daemonActive) {
1629
+ console.log(`[inbox] ${entry.config.label}: no worker, recreating...`);
1630
+ this.enterDaemonListening(agentId);
1631
+ }
1632
+ return;
1633
+ }
1634
+ if (!entry.worker.isListening()) {
1635
+ if (++debugTick % 15 === 0) {
1636
+ console.log(`[inbox] ${entry.config.label}: not listening (smith=${entry.state.smithStatus} task=${entry.state.taskStatus})`);
1637
+ }
1638
+ return;
1639
+ }
1640
+
1641
+ // Skip if any message is already running for this agent
1642
+ const hasRunning = this.bus.getLog().some(m => m.to === agentId && m.status === 'running' && m.type !== 'ack');
1643
+ if (hasRunning) return;
1644
+
1645
+ // requiresApproval is handled at message arrival time (routeMessageToAgent),
1646
+ // not in the message loop. Approved messages come through as normal 'pending'.
1647
+
1648
+ // Find next pending message, applying causedBy rules
1649
+ const allPending = this.bus.getPendingMessagesFor(agentId).filter(m => m.from !== agentId && m.type !== 'ack');
1650
+ const pending = allPending.filter(m => {
1651
+ // Tickets: accepted but check retry limit
1652
+ if (m.category === 'ticket') {
1653
+ const maxRetries = m.maxRetries ?? 3;
1654
+ if ((m.ticketRetries || 0) >= maxRetries) {
1655
+ console.log(`[inbox] ${entry.config.label}: ticket ${m.id.slice(0, 8)} exceeded max retries (${maxRetries}), marking failed`);
1656
+ m.status = 'failed' as any;
1657
+ m.ticketStatus = 'closed';
1658
+ this.emit('event', { type: 'bus_message_status', messageId: m.id, status: 'failed' } as any);
1659
+ return false;
1660
+ }
1661
+ return true;
1662
+ }
1663
+
1664
+ // System messages (from _watch, _system, user) bypass causedBy rules
1665
+ if (m.from.startsWith('_') || m.from === 'user') return true;
1666
+
1667
+ // Notifications: check causedBy for loop prevention
1668
+ if (m.causedBy) {
1669
+ // Rule 1: Is this a response to something I sent? → accept (for verification)
1670
+ const myOutbox = this.bus.getOutboxFor(agentId);
1671
+ if (myOutbox.some(o => o.id === m.causedBy!.messageId)) return true;
1672
+
1673
+ // Rule 2: Notification from downstream → discard (prevents reverse flow)
1674
+ if (!this.isUpstream(m.from, agentId)) {
1675
+ console.log(`[inbox] ${entry.config.label}: discarding notification from downstream ${this.agents.get(m.from)?.config.label || m.from}`);
1676
+ m.status = 'done' as any; // silently consume
1677
+ return false;
1678
+ }
1679
+ }
1680
+
1681
+ // Default: accept (upstream notifications, no causedBy = initial trigger)
1682
+ return true;
1683
+ });
1684
+ if (pending.length === 0) return;
1685
+
1686
+ const nextMsg = pending[0];
1687
+ const fromLabel = this.agents.get(nextMsg.from)?.config.label || nextMsg.from;
1688
+ console.log(`[inbox] ${entry.config.label}: consuming message from ${fromLabel} (${nextMsg.payload.action})`);
1689
+
1690
+ // Mark message as running (being processed)
1691
+ nextMsg.status = 'running' as any;
1692
+ this.emit('event', { type: 'bus_message_status', messageId: nextMsg.id, status: 'running' } as any);
1693
+
1694
+ const logEntry = {
1695
+ type: 'system' as const,
1696
+ subtype: 'bus_message',
1697
+ content: `[From ${fromLabel}]: ${nextMsg.payload.content || nextMsg.payload.action}`,
1698
+ timestamp: new Date(nextMsg.timestamp).toISOString(),
1699
+ };
1700
+
1701
+ entry.worker.setProcessingMessage(nextMsg.id);
1702
+ entry.worker.wake({ type: 'bus_message', messages: [logEntry] });
1703
+ };
1704
+
1705
+ // Check every 2 seconds
1706
+ const timer = setInterval(tick, 2000);
1707
+ timer.unref(); // Don't prevent process exit in tests
1708
+ this.messageLoopTimers.set(agentId, timer);
1709
+ // Also run immediately
1710
+ tick();
1711
+ }
1712
+
1713
+ /** Stop the message consumption loop for a smith */
1714
+ private stopMessageLoop(agentId: string): void {
1715
+ const timer = this.messageLoopTimers.get(agentId);
1716
+ if (timer) {
1717
+ clearInterval(timer);
1718
+ this.messageLoopTimers.delete(agentId);
1719
+ }
1720
+ }
1721
+
1722
+ /** Stop all message loops */
1723
+ private stopAllMessageLoops(): void {
1724
+ for (const [id] of this.messageLoopTimers) {
1725
+ this.stopMessageLoop(id);
1726
+ }
1727
+ }
1728
+
1729
+ /** Check if all agents are done and no pending work remains */
1730
+ private checkWorkspaceComplete(): void {
1731
+ let allDone = true;
1732
+ for (const [id, entry] of this.agents) {
1733
+ const ws = entry.worker?.getState();
1734
+ const taskSt = ws?.taskStatus ?? entry.state.taskStatus;
1735
+ if (taskSt === 'running' || this.approvalQueue.has(id)) {
1736
+ allDone = false;
1737
+ break;
1738
+ }
1739
+ // idle agents with unmet deps don't block completion
1740
+ if (taskSt === 'idle' && entry.config.dependsOn.length > 0) {
1741
+ const allDepsDone = entry.config.dependsOn.every(depId => {
1742
+ const dep = this.agents.get(depId);
1743
+ return dep && (dep.state.taskStatus === 'done');
1744
+ });
1745
+ if (allDepsDone) {
1746
+ allDone = false; // idle but ready to run = not complete
1747
+ break;
1748
+ }
1749
+ }
1750
+ }
1751
+
1752
+ if (allDone && this.agents.size > 0) {
1753
+ const hasPendingRequests = this.bus.getLog().some(m =>
1754
+ m.type === 'request' && !this.bus.getLog().some(r =>
1755
+ r.type === 'response' && r.payload.replyTo === m.id
1756
+ )
1757
+ );
1758
+ if (!hasPendingRequests) {
1759
+ console.log('[workspace] All agents complete, no pending requests. Workspace done.');
1760
+ this.emit('event', { type: 'workspace_complete' } satisfies OrchestratorEvent);
1761
+ }
1762
+ }
1763
+ }
1764
+
1765
+ /** Get agents that are idle and have all dependencies met */
1766
+ private getReadyAgents(): string[] {
1767
+ const ready: string[] = [];
1768
+ for (const [id, entry] of this.agents) {
1769
+ if (entry.state.taskStatus !== 'idle') continue;
1770
+ const allDepsDone = entry.config.dependsOn.every(depId => {
1771
+ const dep = this.agents.get(depId);
1772
+ return dep && dep.state.taskStatus === 'done';
1773
+ });
1774
+ if (allDepsDone) ready.push(id);
1775
+ }
1776
+ return ready;
1777
+ }
1778
+
1779
+ /**
1780
+ * Parse CLI agent output for bus message markers.
1781
+ * Format: [SEND:TargetLabel:action] content
1782
+ * Example: [SEND:Engineer:fix_request] SQL injection found in auth module
1783
+ */
1784
+ /**
1785
+ * After an agent completes, notify downstream agents that already ran (done/failed)
1786
+ * to re-validate their work. Sets them to waiting_approval so user decides.
1787
+ */
1788
+ private notifyDownstreamForRevalidation(completedAgentId: string, files: string[]): void {
1789
+ const completedLabel = this.agents.get(completedAgentId)?.config.label || completedAgentId;
1790
+
1791
+ for (const [id, entry] of this.agents) {
1792
+ if (id === completedAgentId) continue;
1793
+ if (!entry.config.dependsOn.includes(completedAgentId)) continue;
1794
+
1795
+ // Only notify agents that already completed — they need to re-validate
1796
+ if (entry.state.taskStatus !== 'done' && entry.state.taskStatus !== 'failed') continue;
1797
+
1798
+ console.log(`[workspace] ${completedLabel} changed → ${entry.config.label} needs re-validation`);
1799
+
1800
+ // Send bus message
1801
+ this.bus.send(completedAgentId, id, 'notify', {
1802
+ action: 'update_notify',
1803
+ content: `${completedLabel} completed with changes. Please re-validate.`,
1804
+ files,
1805
+ });
1806
+
1807
+ // Set to waiting_approval so user confirms re-run
1808
+ entry.state.taskStatus = 'idle';
1809
+ entry.state.history.push({
1810
+ type: 'system',
1811
+ subtype: 'revalidation_request',
1812
+ content: `[${completedLabel}] completed with changes — approve to re-run validation`,
1813
+ timestamp: new Date().toISOString(),
1814
+ });
1815
+ this.approvalQueue.add(id);
1816
+ this.emit('event', { type: 'task_status', agentId: id, taskStatus: 'idle' } satisfies WorkerEvent);
1817
+ this.emit('event', {
1818
+ type: 'approval_required',
1819
+ agentId: id,
1820
+ upstreamId: completedAgentId,
1821
+ } satisfies OrchestratorEvent);
1822
+ }
1823
+ }
1824
+
1825
+ /** Track how many history entries have been scanned per agent to avoid re-parsing */
1826
+ private busMarkerScanned = new Map<string, number>();
1827
+
1828
+ private parseBusMarkers(fromAgentId: string, history: { type: string; content: string }[]): void {
1829
+ const markerRegex = /\[SEND:([^:]+):([^\]]+)\]\s*(.+)/g;
1830
+ const labelToId = new Map<string, string>();
1831
+ for (const [id, e] of this.agents) {
1832
+ labelToId.set(e.config.label.toLowerCase(), id);
1833
+ }
1834
+
1835
+ // Only scan new entries since last parse (avoid re-sending from old history)
1836
+ const lastScanned = this.busMarkerScanned.get(fromAgentId) || 0;
1837
+ const newEntries = history.slice(lastScanned);
1838
+ this.busMarkerScanned.set(fromAgentId, history.length);
1839
+
1840
+ for (const entry of newEntries) {
1841
+ let match;
1842
+ while ((match = markerRegex.exec(entry.content)) !== null) {
1843
+ const targetLabel = match[1].trim();
1844
+ const action = match[2].trim();
1845
+ const content = match[3].trim();
1846
+ const targetId = labelToId.get(targetLabel.toLowerCase());
1847
+
1848
+ if (targetId && targetId !== fromAgentId) {
1849
+ console.log(`[bus] Parsed marker from ${fromAgentId}: → ${targetLabel} (${action}): ${content.slice(0, 60)}`);
1850
+ this.bus.send(fromAgentId, targetId, 'notify', { action, content });
1851
+ }
1852
+ }
1853
+ }
1854
+ }
1855
+
1856
+ private saveNow(): void {
1857
+ saveWorkspace(this.getFullState()).catch(() => {});
1858
+ }
1859
+
1860
+ /** Emit agents_changed so SSE pushes the updated list to frontend */
1861
+ private emitAgentsChanged(): void {
1862
+ const agents = Array.from(this.agents.values()).map(e => e.config);
1863
+ const agentStates = this.getAllAgentStates();
1864
+ this.emit('event', { type: 'agents_changed', agents, agentStates } satisfies WorkerEvent);
1865
+ }
1866
+
1867
+ private emitWorkspaceStatus(): void {
1868
+ let running = 0, done = 0;
1869
+ for (const [, entry] of this.agents) {
1870
+ const ws = entry.worker?.getState();
1871
+ const taskSt = ws?.taskStatus ?? entry.state.taskStatus;
1872
+ if (taskSt === 'running') running++;
1873
+ if (taskSt === 'done') done++;
1874
+ }
1875
+ this.emit('event', {
1876
+ type: 'workspace_status',
1877
+ running,
1878
+ done,
1879
+ total: this.agents.size,
1880
+ } satisfies OrchestratorEvent);
1881
+ }
1882
+
1883
+ /**
1884
+ * Update agent memory after execution completes.
1885
+ * Parses step results into structured memory entries.
1886
+ */
1887
+ private async updateAgentMemory(agentId: string, config: WorkspaceAgentConfig, stepResults: string[]): Promise<void> {
1888
+ try {
1889
+ const entry = this.agents.get(agentId);
1890
+
1891
+ // Capture observation from the last step (previous steps captured in 'step' event handler)
1892
+ const lastStep = config.steps[config.steps.length - 1];
1893
+ const lastResult = stepResults[stepResults.length - 1];
1894
+ if (lastStep && lastResult) {
1895
+ const obs = parseStepToObservations(lastStep.label, lastResult, entry?.state.artifacts || []);
1896
+ for (const o of obs) {
1897
+ await addObservation(this.workspaceId, agentId, config.label, config.role, o);
1898
+ }
1899
+ }
1900
+
1901
+ // Add session summary
1902
+ const summary = buildSessionSummary(
1903
+ config.steps.map(s => s.label),
1904
+ stepResults,
1905
+ entry?.state.artifacts || [],
1906
+ );
1907
+ await addSessionSummary(this.workspaceId, agentId, summary);
1908
+
1909
+ console.log(`[workspace] Updated memory for ${config.label}`);
1910
+ } catch (err: any) {
1911
+ console.error(`[workspace] Failed to update memory for ${config.label}:`, err.message);
1912
+ }
1913
+ }
1914
+ }