better-symphony 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CLAUDE.md +60 -0
  2. package/LICENSE +21 -0
  3. package/README.md +292 -0
  4. package/dist/web/app.css +2 -0
  5. package/dist/web/index.html +13 -0
  6. package/dist/web/main.js +235 -0
  7. package/package.json +62 -0
  8. package/src/agent/claude-runner.ts +576 -0
  9. package/src/agent/protocol.ts +2 -0
  10. package/src/agent/runner.ts +2 -0
  11. package/src/agent/session.ts +113 -0
  12. package/src/cli.ts +354 -0
  13. package/src/config/loader.ts +379 -0
  14. package/src/config/types.ts +382 -0
  15. package/src/index.ts +53 -0
  16. package/src/linear-cli.ts +414 -0
  17. package/src/logging/logger.ts +143 -0
  18. package/src/orchestrator/multi-orchestrator.ts +266 -0
  19. package/src/orchestrator/orchestrator.ts +1357 -0
  20. package/src/orchestrator/scheduler.ts +195 -0
  21. package/src/orchestrator/state.ts +201 -0
  22. package/src/prompts/github-system-prompt.md +51 -0
  23. package/src/prompts/linear-system-prompt.md +44 -0
  24. package/src/tracker/client.ts +577 -0
  25. package/src/tracker/github-issues-tracker.ts +280 -0
  26. package/src/tracker/github-pr-tracker.ts +298 -0
  27. package/src/tracker/index.ts +9 -0
  28. package/src/tracker/interface.ts +76 -0
  29. package/src/tracker/linear-tracker.ts +147 -0
  30. package/src/tracker/queries.ts +281 -0
  31. package/src/tracker/types.ts +125 -0
  32. package/src/tui/App.tsx +157 -0
  33. package/src/tui/LogView.tsx +120 -0
  34. package/src/tui/StatusBar.tsx +72 -0
  35. package/src/tui/TabBar.tsx +55 -0
  36. package/src/tui/sink.ts +47 -0
  37. package/src/tui/types.ts +6 -0
  38. package/src/tui/useOrchestrator.ts +244 -0
  39. package/src/web/server.ts +182 -0
  40. package/src/web/sink.ts +67 -0
  41. package/src/web-ui/App.tsx +60 -0
  42. package/src/web-ui/components/agent-table.tsx +57 -0
  43. package/src/web-ui/components/header.tsx +72 -0
  44. package/src/web-ui/components/log-stream.tsx +111 -0
  45. package/src/web-ui/components/retry-table.tsx +58 -0
  46. package/src/web-ui/components/stats-cards.tsx +142 -0
  47. package/src/web-ui/components/ui/badge.tsx +30 -0
  48. package/src/web-ui/components/ui/button.tsx +39 -0
  49. package/src/web-ui/components/ui/card.tsx +32 -0
  50. package/src/web-ui/globals.css +27 -0
  51. package/src/web-ui/index.html +13 -0
  52. package/src/web-ui/lib/use-sse.ts +98 -0
  53. package/src/web-ui/lib/utils.ts +25 -0
  54. package/src/web-ui/main.tsx +4 -0
  55. package/src/workspace/hooks.ts +97 -0
  56. package/src/workspace/manager.ts +211 -0
  57. package/src/workspace/render-hook.ts +13 -0
  58. package/workflows/dev.md +127 -0
  59. package/workflows/github-issues.md +107 -0
  60. package/workflows/pr-review.md +89 -0
  61. package/workflows/prd.md +170 -0
  62. package/workflows/ralph.md +95 -0
  63. package/workflows/smoke.md +66 -0
@@ -0,0 +1,1357 @@
1
+ /**
2
+ * Symphony Orchestrator
3
+ * Main scheduling loop and coordination
4
+ */
5
+
6
+ import type {
7
+ ServiceConfig,
8
+ Issue,
9
+ ChildIssue,
10
+ WorkflowDefinition,
11
+ OrchestratorState,
12
+ RunningEntry,
13
+ RetryEntry,
14
+ RunAttempt,
15
+ AgentEvent,
16
+ } from "../config/types.js";
17
+ import { AgentError } from "../config/types.js";
18
+ import {
19
+ loadWorkflow,
20
+ buildServiceConfig,
21
+ validateServiceConfig,
22
+ renderPrompt,
23
+ renderSubtaskPrompt,
24
+ } from "../config/loader.js";
25
+ import { basename, join } from "path";
26
+ import { mkdirSync, writeFileSync } from "fs";
27
+ import { homedir } from "os";
28
+ import { LinearClient } from "../tracker/client.js";
29
+ import { GitHubPRTracker } from "../tracker/github-pr-tracker.js";
30
+ import { GitHubIssuesTracker } from "../tracker/github-issues-tracker.js";
31
+ import type { Tracker } from "../tracker/interface.js";
32
+ import { WorkspaceManager } from "../workspace/manager.js";
33
+ import { ClaudeRunner } from "../agent/claude-runner.js";
34
+ import { parseRateLimits } from "../agent/session.js";
35
+ import { logger } from "../logging/logger.js";
36
+ import * as state from "./state.js";
37
+ import * as scheduler from "./scheduler.js";
38
+ import { watch } from "chokidar";
39
+
40
+ export interface OrchestratorOptions {
41
+ workflowPath: string;
42
+ dryRun?: boolean;
43
+ /** Injected shared LinearClient (for multi-workflow mode) */
44
+ linearClient?: LinearClient;
45
+ /** If true, skip internal poll loop — caller drives ticks externally */
46
+ managedPolling?: boolean;
47
+ /** If true, write rendered prompts and agent transcripts to logs dir */
48
+ debug?: boolean;
49
+ }
50
+
51
+ export class Orchestrator {
52
+ private workflowPath: string;
53
+ private workflow: WorkflowDefinition | null = null;
54
+ private config: ServiceConfig | null = null;
55
+ private orchState: OrchestratorState | null = null;
56
+ private linearClient: LinearClient | null = null;
57
+ private tracker: Tracker | null = null;
58
+ private workspaceManager: WorkspaceManager | null = null;
59
+ private pollTimer: Timer | null = null;
60
+ private fileWatcher: ReturnType<typeof watch> | null = null;
61
+ private running = false;
62
+ private managedPolling: boolean;
63
+ private debug: boolean;
64
+ private workflowName: string;
65
+
66
+ constructor(options: OrchestratorOptions) {
67
+ this.workflowPath = options.workflowPath;
68
+ this.workflowName = basename(options.workflowPath, ".md");
69
+ this.managedPolling = options.managedPolling ?? false;
70
+ this.debug = options.debug ?? false;
71
+ if (options.linearClient) {
72
+ this.linearClient = options.linearClient;
73
+ }
74
+ }
75
+
76
+ // ── Tracker Helpers ───────────────────────────────────────────
77
+
78
+ private isGitHubTracker(): boolean {
79
+ return this.config?.tracker.kind === "github-pr" || this.config?.tracker.kind === "github-issues";
80
+ }
81
+
82
+ private async fetchCandidateIssues(): Promise<Issue[]> {
83
+ if (!this.config) throw new Error("Config not initialized");
84
+
85
+ if (this.isGitHubTracker() && this.tracker) {
86
+ return this.tracker.fetchCandidates({
87
+ excludedLabels: this.config.tracker.excluded_labels,
88
+ requiredLabels: this.config.tracker.required_labels,
89
+ });
90
+ }
91
+
92
+ if (!this.linearClient) throw new Error("Linear client not initialized");
93
+ return this.linearClient.fetchCandidateIssues(
94
+ this.config.tracker.project_slug,
95
+ this.config.tracker.active_states
96
+ );
97
+ }
98
+
99
+ private async getIssue(identifier: string): Promise<Issue | null> {
100
+ if (this.isGitHubTracker() && this.tracker) {
101
+ return this.tracker.getIssue(identifier);
102
+ }
103
+ if (!this.linearClient) throw new Error("Linear client not initialized");
104
+ // LinearClient returns raw response, cast for backwards compat
105
+ // TODO: Use LinearTracker for proper normalization
106
+ const raw = await this.linearClient.getIssue(identifier);
107
+ if (!raw) return null;
108
+ return {
109
+ id: raw.id,
110
+ identifier: raw.identifier,
111
+ title: raw.title,
112
+ description: raw.description,
113
+ priority: null,
114
+ state: raw.state.name,
115
+ branch_name: null,
116
+ url: null,
117
+ labels: raw.labels.nodes.map((l) => l.name),
118
+ blocked_by: [],
119
+ children: raw.children.nodes.map((c, idx) => ({
120
+ id: c.id,
121
+ identifier: c.identifier,
122
+ title: c.title,
123
+ description: c.description,
124
+ priority: c.priority,
125
+ state: c.state.name,
126
+ state_type: c.state.type || "unstarted",
127
+ sort_order: idx,
128
+ assignee: null,
129
+ created_at: null,
130
+ updated_at: null,
131
+ })),
132
+ comments: raw.comments.nodes.map((c) => ({
133
+ id: c.id,
134
+ body: c.body,
135
+ created_at: new Date(c.createdAt),
136
+ user: c.user?.name || null,
137
+ })),
138
+ created_at: null,
139
+ updated_at: null,
140
+ };
141
+ }
142
+
143
+ private async fetchIssuesByStates(states: string[]): Promise<Issue[]> {
144
+ if (!this.config) throw new Error("Config not initialized");
145
+
146
+ if (this.isGitHubTracker() && this.tracker) {
147
+ return this.tracker.fetchTerminalIssues(states);
148
+ }
149
+
150
+ if (!this.linearClient) throw new Error("Linear client not initialized");
151
+ return this.linearClient.fetchIssuesByStates(
152
+ this.config.tracker.project_slug,
153
+ states
154
+ );
155
+ }
156
+
157
+ private async fetchIssueStatesByIds(ids: string[]): Promise<Map<string, string>> {
158
+ if (this.isGitHubTracker() && this.tracker) {
159
+ return this.tracker.fetchStatesByIds(ids);
160
+ }
161
+ if (!this.linearClient) throw new Error("Linear client not initialized");
162
+ return this.linearClient.fetchIssueStatesByIds(ids);
163
+ }
164
+
165
+ private async upsertComment(issueId: string, body: string, commentId?: string | null): Promise<string> {
166
+ if (this.isGitHubTracker() && this.tracker) {
167
+ return this.tracker.upsertComment(issueId, body, commentId ?? undefined);
168
+ }
169
+ if (!this.linearClient) throw new Error("Linear client not initialized");
170
+ return this.linearClient.upsertComment(issueId, body, commentId ?? null);
171
+ }
172
+
173
+ // ── Lifecycle ─────────────────────────────────────────────────
174
+
175
+ async start(): Promise<void> {
176
+ logger.info("Starting Symphony orchestrator", { workflowPath: this.workflowPath });
177
+
178
+ // Load and validate workflow
179
+ this.workflow = loadWorkflow(this.workflowPath);
180
+ this.config = buildServiceConfig(this.workflow);
181
+
182
+ const validation = validateServiceConfig(this.config);
183
+ if (!validation.valid) {
184
+ for (const error of validation.errors) {
185
+ logger.error(`Validation error: ${error}`);
186
+ }
187
+ throw new Error(`Configuration validation failed: ${validation.errors.join(", ")}`);
188
+ }
189
+
190
+ // Initialize tracker based on kind
191
+ if (this.config.tracker.kind === "github-pr") {
192
+ // GitHub PR tracker
193
+ this.tracker = new GitHubPRTracker({
194
+ kind: "github-pr",
195
+ repo: this.config.tracker.repo,
196
+ excluded_labels: this.config.tracker.excluded_labels,
197
+ required_labels: this.config.tracker.required_labels,
198
+ });
199
+ logger.info("Using GitHub PR tracker", { repo: this.config.tracker.repo });
200
+ } else if (this.config.tracker.kind === "github-issues") {
201
+ // GitHub Issues tracker
202
+ this.tracker = new GitHubIssuesTracker({
203
+ kind: "github-issues",
204
+ repo: this.config.tracker.repo,
205
+ excluded_labels: this.config.tracker.excluded_labels,
206
+ required_labels: this.config.tracker.required_labels,
207
+ active_states: this.config.tracker.active_states,
208
+ terminal_states: this.config.tracker.terminal_states,
209
+ });
210
+ logger.info("Using GitHub Issues tracker", { repo: this.config.tracker.repo });
211
+ } else {
212
+ // Linear tracker (default)
213
+ if (!this.linearClient) {
214
+ this.linearClient = new LinearClient(
215
+ this.config.tracker.endpoint,
216
+ this.config.tracker.api_key
217
+ );
218
+ this.linearClient.onRateLimit = (attempt, waitSecs) => {
219
+ logger.warn(`Linear rate limit hit, retrying in ${waitSecs}s`, { attempt });
220
+ };
221
+ this.linearClient.onThrottle = (remaining, limit) => {
222
+ logger.debug(`Throttling Linear requests`, { remaining, limit });
223
+ };
224
+ }
225
+ }
226
+
227
+ this.workspaceManager = new WorkspaceManager(this.config);
228
+ this.orchState = state.createOrchestratorState(
229
+ this.config.polling.interval_ms,
230
+ this.config.agent.max_concurrent_agents
231
+ );
232
+
233
+ // Setup file watcher for dynamic reload
234
+ this.setupFileWatcher();
235
+
236
+ // Startup cleanup
237
+ await this.startupCleanup();
238
+
239
+ // Start polling (skip if externally managed)
240
+ this.running = true;
241
+ if (!this.managedPolling) {
242
+ this.schedulePoll(0); // Immediate first tick
243
+ }
244
+
245
+ logger.info("Symphony orchestrator started", {
246
+ binary: this.config.agent.binary,
247
+ poll_interval_ms: this.config.polling.interval_ms,
248
+ max_concurrent_agents: this.config.agent.max_concurrent_agents,
249
+ });
250
+ }
251
+
252
+ async dryRun(): Promise<void> {
253
+ logger.info("Dry run: loading workflow and fetching issues...");
254
+
255
+ this.workflow = loadWorkflow(this.workflowPath);
256
+ this.config = buildServiceConfig(this.workflow);
257
+
258
+ const validation = validateServiceConfig(this.config);
259
+ if (!validation.valid) {
260
+ for (const error of validation.errors) {
261
+ logger.error(`Validation error: ${error}`);
262
+ }
263
+ throw new Error(`Configuration validation failed: ${validation.errors.join(", ")}`);
264
+ }
265
+
266
+ this.linearClient = new LinearClient(
267
+ this.config.tracker.endpoint,
268
+ this.config.tracker.api_key
269
+ );
270
+
271
+ const issues = await this.fetchCandidateIssues();
272
+
273
+ // Filter by labels (same logic as scheduler) - for Linear only, GitHub tracker handles this internally
274
+ const eligible = this.isGitHubTracker() ? issues : issues.filter((issue) => {
275
+ const requiredLabels = this.config!.tracker.required_labels;
276
+ const excludedLabels = this.config!.tracker.excluded_labels;
277
+
278
+ if (requiredLabels.length > 0) {
279
+ const hasAll = requiredLabels.every((rl) =>
280
+ issue.labels.some((l) => l.toLowerCase() === rl.toLowerCase())
281
+ );
282
+ if (!hasAll) return false;
283
+ }
284
+
285
+ if (excludedLabels.length > 0) {
286
+ const hasExcluded = excludedLabels.some((el) =>
287
+ issue.labels.some((l) => l.toLowerCase() === el.toLowerCase())
288
+ );
289
+ if (hasExcluded) return false;
290
+ }
291
+
292
+ return true;
293
+ });
294
+
295
+ if (eligible.length === 0) {
296
+ console.log("No matching issues found.");
297
+ return;
298
+ }
299
+
300
+ console.log(`Found ${eligible.length} matching issue(s):\n`);
301
+
302
+ for (const issue of eligible) {
303
+ const isRalphLoop = this.config.agent.mode === "ralph_loop" && issue.children.length > 0;
304
+
305
+ if (isRalphLoop) {
306
+ const todoChildren = issue.children
307
+ .filter((c) => c.state_type !== "completed" && c.state_type !== "canceled")
308
+ .sort((a, b) => a.sort_order - b.sort_order);
309
+
310
+ if (todoChildren.length === 0) {
311
+ console.log(`── ${issue.identifier}: ${issue.title} (no pending subtasks)\n`);
312
+ continue;
313
+ }
314
+
315
+ for (let i = 0; i < todoChildren.length; i++) {
316
+ const child = todoChildren[i];
317
+ const prompt = await renderSubtaskPrompt(
318
+ this.workflow.prompt_template,
319
+ issue,
320
+ child,
321
+ i + 1,
322
+ todoChildren.length,
323
+ null
324
+ );
325
+
326
+ console.log(`${"─".repeat(60)}`);
327
+ console.log(`Issue: ${issue.identifier} → Subtask ${i + 1}/${todoChildren.length}: ${child.identifier}`);
328
+ console.log(`${"─".repeat(60)}`);
329
+ console.log(prompt);
330
+ console.log();
331
+ }
332
+ } else {
333
+ const prompt = await renderPrompt(this.workflow.prompt_template, issue, null);
334
+
335
+ console.log(`${"─".repeat(60)}`);
336
+ console.log(`Issue: ${issue.identifier}: ${issue.title}`);
337
+ console.log(`${"─".repeat(60)}`);
338
+ console.log(prompt);
339
+ console.log();
340
+ }
341
+ }
342
+ }
343
+
344
+ async stop(): Promise<void> {
345
+ logger.info("Stopping Symphony orchestrator");
346
+ this.running = false;
347
+
348
+ // Cancel poll timer
349
+ if (this.pollTimer) {
350
+ clearTimeout(this.pollTimer);
351
+ this.pollTimer = null;
352
+ }
353
+
354
+ // Stop file watcher
355
+ if (this.fileWatcher) {
356
+ await this.fileWatcher.close();
357
+ this.fileWatcher = null;
358
+ }
359
+
360
+ // Cancel all running workers
361
+ if (this.orchState) {
362
+ for (const entry of this.orchState.running.values()) {
363
+ entry.abortController.abort();
364
+ }
365
+
366
+ // Cancel all retry timers
367
+ for (const entry of this.orchState.retry_attempts.values()) {
368
+ clearTimeout(entry.timer_handle);
369
+ }
370
+ }
371
+
372
+ logger.info("Symphony orchestrator stopped");
373
+ }
374
+
375
+ // ── File Watcher ──────────────────────────────────────────────
376
+
377
+ private setupFileWatcher(): void {
378
+ this.fileWatcher = watch(this.workflowPath, {
379
+ ignoreInitial: true,
380
+ awaitWriteFinish: { stabilityThreshold: 500 },
381
+ });
382
+
383
+ this.fileWatcher.on("change", () => {
384
+ this.reloadWorkflow();
385
+ });
386
+ }
387
+
388
+ private reloadWorkflow(): void {
389
+ try {
390
+ const newWorkflow = loadWorkflow(this.workflowPath);
391
+ const newConfig = buildServiceConfig(newWorkflow);
392
+ const validation = validateServiceConfig(newConfig);
393
+
394
+ if (!validation.valid) {
395
+ logger.error("Workflow reload failed validation, keeping current config", {
396
+ errors: validation.errors,
397
+ });
398
+ return;
399
+ }
400
+
401
+ // Apply new config
402
+ this.workflow = newWorkflow;
403
+ this.config = newConfig;
404
+
405
+ // Update components
406
+ if (this.orchState) {
407
+ this.orchState.poll_interval_ms = newConfig.polling.interval_ms;
408
+ this.orchState.max_concurrent_agents = newConfig.agent.max_concurrent_agents;
409
+ }
410
+
411
+ if (this.workspaceManager) {
412
+ this.workspaceManager.updateConfig(newConfig);
413
+ }
414
+
415
+ if (this.linearClient && !this.managedPolling) {
416
+ // Recreate client if endpoint or key changed (only if we own it)
417
+ this.linearClient = new LinearClient(
418
+ newConfig.tracker.endpoint,
419
+ newConfig.tracker.api_key
420
+ );
421
+ }
422
+
423
+ logger.info("Workflow reloaded successfully", {
424
+ binary: newConfig.agent.binary,
425
+ });
426
+ } catch (err) {
427
+ logger.error(`Failed to reload workflow: ${(err as Error).message}`);
428
+ }
429
+ }
430
+
431
+ // ── Startup Cleanup ───────────────────────────────────────────
432
+
433
+ private async startupCleanup(): Promise<void> {
434
+ if (!this.config || !this.linearClient || !this.workspaceManager) return;
435
+
436
+ try {
437
+ // Fetch terminal state issues
438
+ const terminalIssues = await this.fetchIssuesByStates(
439
+ this.config.tracker.terminal_states
440
+ );
441
+
442
+ // Remove workspaces for terminal issues
443
+ for (const issue of terminalIssues) {
444
+ await this.workspaceManager.removeWorkspace(issue.identifier);
445
+ }
446
+
447
+ if (terminalIssues.length > 0) {
448
+ logger.info(`Cleaned up ${terminalIssues.length} terminal issue workspaces`);
449
+ }
450
+ } catch (err) {
451
+ logger.warn(`Startup cleanup failed: ${(err as Error).message}`);
452
+ }
453
+ }
454
+
455
+ // ── Poll Loop ─────────────────────────────────────────────────
456
+
457
+ private schedulePoll(delayMs: number): void {
458
+ if (!this.running) return;
459
+
460
+ this.pollTimer = setTimeout(async () => {
461
+ await this.pollTick();
462
+ this.schedulePoll(this.orchState?.poll_interval_ms ?? 30000);
463
+ }, delayMs);
464
+ }
465
+
466
+ /** Force an immediate poll tick, resetting the poll timer */
467
+ async forcePoll(): Promise<void> {
468
+ if (!this.running) return;
469
+ // Cancel scheduled poll and run immediately
470
+ if (this.pollTimer) {
471
+ clearTimeout(this.pollTimer);
472
+ this.pollTimer = null;
473
+ }
474
+ logger.info("Force refresh triggered");
475
+ await this.pollTick();
476
+ this.schedulePoll(this.orchState?.poll_interval_ms ?? 30000);
477
+ }
478
+
479
+ private async pollTick(): Promise<void> {
480
+ if (!this.config || (!this.linearClient && !this.tracker) || !this.workspaceManager || !this.orchState) {
481
+ return;
482
+ }
483
+
484
+ try {
485
+ // Part 1: Reconcile running issues
486
+ await this.reconcile();
487
+
488
+ // Part 2: Validate config (re-read for safety)
489
+ if (!this.refreshConfig()) return;
490
+
491
+ // Part 3: Fetch candidate issues
492
+ const issues = await this.fetchCandidateIssues();
493
+
494
+ // Part 4-5: Select and dispatch
495
+ const dispatched = this.dispatchFromIssues(issues);
496
+
497
+ if (dispatched > 0) {
498
+ logger.info(`Dispatched ${dispatched} issues`);
499
+ }
500
+ } catch (err) {
501
+ logger.error(`Poll tick failed: ${(err as Error).message}`);
502
+ }
503
+ }
504
+
505
+ // ── Reconciliation ────────────────────────────────────────────
506
+
507
+ private async reconcile(): Promise<void> {
508
+ if (!this.config || (!this.linearClient && !this.tracker) || !this.orchState || !this.workspaceManager) {
509
+ return;
510
+ }
511
+
512
+ // Part A: Stall detection
513
+ this.runStallDetection();
514
+
515
+ // Part B: Tracker state refresh
516
+ const runningIds = this.getRunningIssueIds();
517
+ if (runningIds.length === 0) return;
518
+
519
+ try {
520
+ const stateMap = await this.fetchIssueStatesByIds(runningIds);
521
+ await this.applyReconcileStates(stateMap);
522
+ } catch (err) {
523
+ logger.warn(`State refresh failed: ${(err as Error).message}`);
524
+ }
525
+ }
526
+
527
+ // ── Dispatch ──────────────────────────────────────────────────
528
+
529
+ private dispatch(issue: Issue, attempt: number | null): boolean {
530
+ if (!this.config || !this.orchState || !this.workspaceManager || !this.workflow) {
531
+ return false;
532
+ }
533
+
534
+ // Claim the issue
535
+ if (!state.claimIssue(this.orchState, issue.id)) {
536
+ return false;
537
+ }
538
+
539
+ const abortController = new AbortController();
540
+ const runAttempt: RunAttempt = {
541
+ issue_id: issue.id,
542
+ issue_identifier: issue.identifier,
543
+ attempt,
544
+ workspace_path: "",
545
+ started_at: new Date(),
546
+ status: "PreparingWorkspace",
547
+ };
548
+
549
+ // Create worker promise
550
+ const worker = this.runWorker(issue, runAttempt, abortController);
551
+
552
+ const entry: RunningEntry = {
553
+ issue,
554
+ attempt: runAttempt,
555
+ session: null,
556
+ worker,
557
+ abortController,
558
+ };
559
+
560
+ state.addRunning(this.orchState, entry);
561
+
562
+ logger.info(`Dispatched ${issue.identifier}`, {
563
+ issue_id: issue.id,
564
+ issue_identifier: issue.identifier,
565
+ attempt,
566
+ });
567
+
568
+ return true;
569
+ }
570
+
571
+ private async runWorker(
572
+ issue: Issue,
573
+ runAttempt: RunAttempt,
574
+ abortController: AbortController
575
+ ): Promise<void> {
576
+ if (!this.config || !this.orchState || !this.workspaceManager || !this.workflow) {
577
+ return;
578
+ }
579
+
580
+ const config = this.config;
581
+ const orchState = this.orchState;
582
+ const workspaceManager = this.workspaceManager;
583
+ const workflow = this.workflow;
584
+
585
+ try {
586
+ // Step 1: Create workspace
587
+ runAttempt.status = "PreparingWorkspace";
588
+ const workspace = await workspaceManager.createWorkspace(issue);
589
+ runAttempt.workspace_path = workspace.path;
590
+
591
+ // Step 2: Run before_run hook
592
+ await workspaceManager.runBeforeRunHook(workspace.path, issue);
593
+
594
+ // Check if ralph_loop mode with children
595
+ const isRalphLoop = config.agent.mode === "ralph_loop" && issue.children.length > 0;
596
+
597
+ if (isRalphLoop) {
598
+ // Ralph Loop Mode: Run through subtasks externally
599
+ const allDone = await this.runRalphLoop(issue, workspace.path, runAttempt, abortController, workflow, config);
600
+
601
+ runAttempt.status = "Succeeded";
602
+ logger.info(`Worker completed for ${issue.identifier}`, {
603
+ issue_id: issue.id,
604
+ issue_identifier: issue.identifier,
605
+ ralph_loop_done: allDone,
606
+ });
607
+
608
+ if (allDone) {
609
+ // All subtasks done — transition parent to "Done" and stop
610
+ await this.transitionIssueToDone(issue);
611
+ } else {
612
+ // More subtasks remain (capped by max_iterations) — continue later
613
+ this.queueContinuationRetry(issue);
614
+ }
615
+ } else {
616
+ // Default Mode: Single prompt for entire issue
617
+ runAttempt.status = "BuildingPrompt";
618
+ const prompt = await renderPrompt(workflow.prompt_template, issue, runAttempt.attempt);
619
+
620
+ this.writePromptDebug(workspace.path, issue.identifier, prompt);
621
+
622
+ runAttempt.status = "LaunchingAgentProcess";
623
+ let agentError: string | null = null;
624
+ try {
625
+ await this.runAgentOnce(issue, workspace.path, prompt, runAttempt.attempt, abortController, config);
626
+ } catch (err) {
627
+ agentError = (err as Error).message;
628
+ }
629
+
630
+ // Agent exited — check the issue's current state to determine outcome
631
+ const freshIssue = await this.getIssue(issue.identifier);
632
+ const freshState = freshIssue?.state?.trim().toLowerCase();
633
+ const isTerminal = freshState && config.tracker.terminal_states.some(
634
+ (s) => s.trim().toLowerCase() === freshState
635
+ );
636
+ const isError = freshState && config.tracker.error_states.some(
637
+ (s) => s.trim().toLowerCase() === freshState
638
+ );
639
+
640
+ if (isTerminal) {
641
+ runAttempt.status = "Succeeded";
642
+ logger.info(`Worker done for ${issue.identifier} (issue is ${freshIssue!.state})`, {
643
+ issue_id: issue.id,
644
+ issue_identifier: issue.identifier,
645
+ state: freshIssue!.state,
646
+ });
647
+ } else {
648
+ runAttempt.status = "Failed";
649
+ runAttempt.error = isError
650
+ ? `Agent set issue to error state (${freshIssue!.state})`
651
+ : agentError ?? "Agent exited but issue not in terminal state";
652
+ logger.error(`Worker failed for ${issue.identifier}: issue is ${freshIssue?.state ?? "unknown"}`, {
653
+ issue_id: issue.id,
654
+ issue_identifier: issue.identifier,
655
+ state: freshIssue?.state,
656
+ is_error_state: !!isError,
657
+ agent_error: agentError,
658
+ });
659
+
660
+ // Queue retry with backoff
661
+ this.queueRetry(issue, (runAttempt.attempt ?? 0) + 1, runAttempt.error);
662
+ }
663
+ }
664
+ } catch (err) {
665
+ // Errors from workspace creation, hooks, or state checking — not agent execution
666
+ const errorMsg = (err as Error).message;
667
+
668
+ runAttempt.status = "Failed";
669
+ runAttempt.error = errorMsg;
670
+
671
+ logger.error(`Worker failed for ${issue.identifier}: ${errorMsg}`, {
672
+ issue_id: issue.id,
673
+ issue_identifier: issue.identifier,
674
+ });
675
+
676
+ // Queue retry with backoff
677
+ this.queueRetry(issue, (runAttempt.attempt ?? 0) + 1, errorMsg);
678
+ } finally {
679
+ // Run after_run hook
680
+ if (runAttempt.workspace_path) {
681
+ await workspaceManager.runAfterRunHook(runAttempt.workspace_path, issue);
682
+ }
683
+
684
+ // Remove from running
685
+ state.removeRunning(orchState, issue.id);
686
+
687
+ // Release claim if no retry is pending, so the issue can be
688
+ // re-dispatched if it transitions back to an active state.
689
+ if (!state.getRetry(orchState, issue.id)) {
690
+ state.releaseClaim(orchState, issue.id);
691
+ }
692
+
693
+ // Update running entry session to orchestrator totals
694
+ const entry = orchState.running.get(issue.id);
695
+ if (entry?.session) {
696
+ state.updateTotals(orchState, {
697
+ delta_input: entry.session.input_tokens,
698
+ delta_output: entry.session.output_tokens,
699
+ delta_total: entry.session.total_tokens,
700
+ });
701
+ }
702
+ }
703
+ }
704
+
705
+ // ── Ralph Loop Mode ───────────────────────────────────────────
706
+
707
+ /**
708
+ * Run through subtasks externally (ralph_loop mode)
709
+ * Fresh Claude session per subtask, maintains order
710
+ * Returns true if all subtasks are done (no more work to do)
711
+ */
712
+ private async runRalphLoop(
713
+ parentIssue: Issue,
714
+ workspacePath: string,
715
+ runAttempt: RunAttempt,
716
+ abortController: AbortController,
717
+ workflow: WorkflowDefinition,
718
+ config: ServiceConfig
719
+ ): Promise<boolean> {
720
+ // Filter to non-done subtasks, sorted by order
721
+ let todoChildren = parentIssue.children
722
+ .filter((c) => c.state_type !== "completed" && c.state_type !== "canceled")
723
+ .sort((a, b) => a.sort_order - b.sort_order);
724
+
725
+ // Apply max_iterations cap
726
+ const maxIter = config.agent.max_iterations;
727
+ let wasCapped = false;
728
+ if (maxIter > 0 && todoChildren.length > maxIter) {
729
+ wasCapped = true;
730
+ logger.info(`Capping Ralph loop to ${maxIter} iterations (${todoChildren.length} pending)`, {
731
+ issue_identifier: parentIssue.identifier,
732
+ });
733
+ todoChildren = todoChildren.slice(0, maxIter);
734
+ }
735
+
736
+ if (todoChildren.length === 0) {
737
+ logger.info(`No pending subtasks for ${parentIssue.identifier}`, {
738
+ issue_identifier: parentIssue.identifier,
739
+ total_children: parentIssue.children.length,
740
+ });
741
+ return true;
742
+ }
743
+
744
+ logger.info(`Starting Ralph loop for ${parentIssue.identifier}`, {
745
+ issue_identifier: parentIssue.identifier,
746
+ total_subtasks: todoChildren.length,
747
+ });
748
+
749
+ // Create status comment on parent issue
750
+ let statusCommentId: string | null = null;
751
+ const startedAt = formatTime();
752
+ try {
753
+ statusCommentId = await this.linearClient!.createComment(
754
+ parentIssue.id,
755
+ this.buildRalphStatusComment(parentIssue, todoChildren, -1, startedAt, "starting"),
756
+ );
757
+ } catch (err) {
758
+ logger.warn(`Failed to create status comment: ${(err as Error).message}`);
759
+ }
760
+
761
+ for (let i = 0; i < todoChildren.length; i++) {
762
+ const child = todoChildren[i];
763
+ const subtaskIndex = i + 1;
764
+
765
+ // Check for abort
766
+ if (abortController.signal.aborted) {
767
+ logger.info(`Ralph loop aborted for ${parentIssue.identifier}`, {
768
+ issue_identifier: parentIssue.identifier,
769
+ completed_subtasks: i,
770
+ });
771
+ await this.updateRalphStatus(parentIssue.id, statusCommentId, parentIssue, todoChildren, i - 1, startedAt, "aborted");
772
+ throw new Error("Ralph loop aborted");
773
+ }
774
+
775
+ logger.info(`Processing subtask ${subtaskIndex}/${todoChildren.length}: ${child.identifier}`, {
776
+ parent_identifier: parentIssue.identifier,
777
+ subtask_identifier: child.identifier,
778
+ subtask_title: child.title,
779
+ });
780
+
781
+ // Update status comment: mark current subtask as in-progress
782
+ await this.updateRalphStatus(parentIssue.id, statusCommentId, parentIssue, todoChildren, i, startedAt, "running");
783
+
784
+ // Build prompt for this subtask
785
+ runAttempt.status = "BuildingPrompt";
786
+ const prompt = await renderSubtaskPrompt(
787
+ workflow.prompt_template,
788
+ parentIssue,
789
+ child,
790
+ subtaskIndex,
791
+ todoChildren.length,
792
+ runAttempt.attempt
793
+ );
794
+
795
+ this.writePromptDebug(workspacePath, `${parentIssue.identifier}_subtask-${subtaskIndex}`, prompt);
796
+
797
+ // Run agent for this subtask (fresh session)
798
+ runAttempt.status = "LaunchingAgentProcess";
799
+ let agentError: Error | null = null;
800
+ try {
801
+ await this.runAgentOnce(
802
+ parentIssue,
803
+ workspacePath,
804
+ prompt,
805
+ runAttempt.attempt,
806
+ abortController,
807
+ config
808
+ );
809
+ } catch (err) {
810
+ agentError = err as Error;
811
+ }
812
+
813
+ // Verify the child's actual state in Linear (source of truth)
814
+ const freshChild = await this.linearClient!.getIssue(child.identifier);
815
+ const childDone = freshChild && (freshChild.state.type === "completed" || freshChild.state.type === "canceled");
816
+
817
+ if (agentError) {
818
+ if (childDone) {
819
+ // Agent threw (e.g. abort race with reconciliation) but subtask is actually done
820
+ logger.info(`Subtask ${child.identifier} completed despite agent error: ${agentError.message}`, {
821
+ parent_identifier: parentIssue.identifier,
822
+ subtask_identifier: child.identifier,
823
+ });
824
+ child.state_type = "completed";
825
+ } else {
826
+ // Update status comment with error
827
+ await this.updateRalphStatus(parentIssue.id, statusCommentId, parentIssue, todoChildren, i, startedAt, "error", agentError.message);
828
+ // Subtask genuinely failed — rethrow
829
+ throw agentError;
830
+ }
831
+ } else if (!childDone) {
832
+ logger.warn(`Subtask ${child.identifier} not completed by agent (state: ${freshChild?.state.name})`, {
833
+ parent_identifier: parentIssue.identifier,
834
+ subtask_identifier: child.identifier,
835
+ state_type: freshChild?.state.type,
836
+ });
837
+ const err = new AgentError(
838
+ "turn_failed",
839
+ `Subtask ${child.identifier} not completed after agent run (state: ${freshChild?.state.name})`
840
+ );
841
+ await this.updateRalphStatus(parentIssue.id, statusCommentId, parentIssue, todoChildren, i, startedAt, "error", err.message);
842
+ throw err;
843
+ } else {
844
+ child.state_type = "completed";
845
+ }
846
+
847
+ // Refresh parent's children states so next iteration's prompt shows accurate progress
848
+ try {
849
+ const freshParent = await this.linearClient!.getIssue(parentIssue.identifier);
850
+ if (freshParent) {
851
+ for (const fc of freshParent.children.nodes) {
852
+ const existing = parentIssue.children.find((c) => c.id === fc.id);
853
+ if (existing) {
854
+ existing.state = fc.state.name;
855
+ existing.state_type = fc.state.type;
856
+ }
857
+ }
858
+ }
859
+ } catch (refreshErr) {
860
+ logger.warn(`Failed to refresh children states: ${(refreshErr as Error).message}`);
861
+ }
862
+
863
+ logger.info(`Completed subtask ${subtaskIndex}/${todoChildren.length}: ${child.identifier}`, {
864
+ parent_identifier: parentIssue.identifier,
865
+ subtask_identifier: child.identifier,
866
+ });
867
+ }
868
+
869
+ // Update status comment to final state
870
+ const allDone = !wasCapped;
871
+ await this.updateRalphStatus(
872
+ parentIssue.id,
873
+ statusCommentId,
874
+ parentIssue,
875
+ todoChildren,
876
+ todoChildren.length,
877
+ startedAt,
878
+ allDone ? "completed" : "paused",
879
+ );
880
+
881
+ logger.info(`Ralph loop completed for ${parentIssue.identifier}`, {
882
+ issue_identifier: parentIssue.identifier,
883
+ completed_subtasks: todoChildren.length,
884
+ });
885
+
886
+ // All done if we weren't capped by max_iterations
887
+ return allDone;
888
+ }
889
+
890
+ // ── Ralph Status Comment ─────────────────────────────────────────
891
+
892
+ private buildRalphStatusComment(
893
+ parentIssue: Issue,
894
+ todoChildren: ChildIssue[],
895
+ currentIndex: number,
896
+ startedAt: string,
897
+ phase: "starting" | "running" | "completed" | "paused" | "error" | "aborted",
898
+ errorMsg?: string,
899
+ ): string {
900
+ const headerMap = {
901
+ starting: "starting work",
902
+ running: `processing subtasks (${currentIndex + 1}/${todoChildren.length})`,
903
+ completed: "all subtasks completed",
904
+ paused: `paused (completed ${todoChildren.length} subtask${todoChildren.length !== 1 ? "s" : ""} this session)`,
905
+ error: "error encountered",
906
+ aborted: "aborted",
907
+ };
908
+
909
+ const lines: string[] = [
910
+ `**Ralph Loop** — ${headerMap[phase]}.`,
911
+ "",
912
+ "---",
913
+ `- [x] Started _(${startedAt})_`,
914
+ ];
915
+
916
+ for (let i = 0; i < todoChildren.length; i++) {
917
+ const child = todoChildren[i];
918
+ const isDone = child.state_type === "completed" || child.state_type === "canceled";
919
+
920
+ if (isDone || i < currentIndex) {
921
+ lines.push(`- [x] ${child.identifier}: ${child.title} _(${formatTime()})_`);
922
+ } else if (i === currentIndex && phase === "running") {
923
+ lines.push(`- [ ] ${child.identifier}: ${child.title} _(in progress...)_`);
924
+ } else if (i === currentIndex && phase === "error") {
925
+ lines.push(`- [ ] ${child.identifier}: ${child.title} — **failed**`);
926
+ } else {
927
+ lines.push(`- [ ] ${child.identifier}: ${child.title}`);
928
+ }
929
+ }
930
+
931
+ if (errorMsg) {
932
+ lines.push("", `**Error:** ${errorMsg}`);
933
+ }
934
+
935
+ return lines.join("\n");
936
+ }
937
+
938
+ private async updateRalphStatus(
939
+ issueId: string,
940
+ commentId: string | null,
941
+ parentIssue: Issue,
942
+ todoChildren: ChildIssue[],
943
+ currentIndex: number,
944
+ startedAt: string,
945
+ phase: "starting" | "running" | "completed" | "paused" | "error" | "aborted",
946
+ errorMsg?: string,
947
+ ): Promise<void> {
948
+ if (!this.linearClient && !this.tracker) return;
949
+ try {
950
+ const body = this.buildRalphStatusComment(parentIssue, todoChildren, currentIndex, startedAt, phase, errorMsg);
951
+ await this.upsertComment(issueId, body, commentId);
952
+ } catch (err) {
953
+ logger.warn(`Failed to update status comment: ${(err as Error).message}`);
954
+ }
955
+ }
956
+
957
+ /**
958
+ * Run a single agent session using the configured harness
959
+ */
960
+ private async runAgentOnce(
961
+ issue: Issue,
962
+ workspacePath: string,
963
+ prompt: string,
964
+ attempt: number | null,
965
+ abortController: AbortController,
966
+ config: ServiceConfig
967
+ ): Promise<void> {
968
+ const binary = config.agent.binary;
969
+
970
+ if (binary === "claude") {
971
+ let transcriptPath: string | undefined;
972
+ if (this.debug) {
973
+ const logsDir = join(homedir(), ".symphony", "logs");
974
+ mkdirSync(logsDir, { recursive: true });
975
+ const ts = new Date().toISOString().replace(/[:.]/g, "-");
976
+ transcriptPath = join(logsDir, `transcript-${issue.identifier}-${ts}.md`);
977
+ }
978
+
979
+ const runner = new ClaudeRunner({
980
+ config,
981
+ issue,
982
+ workspacePath,
983
+ prompt,
984
+ attempt,
985
+ onEvent: (event) => {
986
+ // Link runner session to running entry for stall detection.
987
+ // Without this, entry.session stays null and stall detection falls back to
988
+ // entry.attempt.started_at, causing ralph loops to be killed after stall_timeout_ms
989
+ // even when the agent is actively working on subtasks.
990
+ // NOTE: In ralph_loop mode, each subtask gets a new runner/session, so we must
991
+ // always update (not just when null) to track the current subtask's activity.
992
+ const entry = this.orchState?.running.get(issue.id);
993
+ if (entry) {
994
+ entry.session = runner.getSession();
995
+ }
996
+ this.handleAgentEvent(issue.id, event);
997
+ },
998
+ abortSignal: abortController.signal,
999
+ transcriptPath,
1000
+ });
1001
+ await runner.run();
1002
+ } else {
1003
+ throw new Error(`Unsupported binary: ${binary}. Only "claude" is currently implemented.`);
1004
+ }
1005
+ }
1006
+
1007
+ // ── Prompt Debug ────────────────────────────────────────────
1008
+
1009
+ private writePromptDebug(_workspacePath: string, label: string, prompt: string): void {
1010
+ if (!this.debug) return;
1011
+
1012
+ try {
1013
+ const logsDir = join(homedir(), ".symphony", "logs");
1014
+ mkdirSync(logsDir, { recursive: true });
1015
+ const filename = `prompt-${label}.md`;
1016
+ const filepath = join(logsDir, filename);
1017
+ writeFileSync(filepath, prompt, "utf-8");
1018
+ logger.info(`Wrote prompt to ${filepath} (${prompt.length} chars)`, {
1019
+ issue_identifier: label,
1020
+ });
1021
+ } catch (err) {
1022
+ logger.warn(`Failed to write prompt debug file: ${(err as Error).message}`);
1023
+ }
1024
+ }
1025
+
1026
+ // ── Parent Issue Transition ──────────────────────────────────
1027
+
1028
+ /**
1029
+ * Transition a parent issue to "Done" after all subtasks complete
1030
+ */
1031
+ private async transitionIssueToDone(issue: Issue): Promise<void> {
1032
+ // GitHub PRs use labels, not state transitions
1033
+ if (this.isGitHubTracker()) {
1034
+ logger.info(`Skipping state transition for GitHub PR ${issue.identifier} (use labels instead)`);
1035
+ return;
1036
+ }
1037
+
1038
+ if (!this.linearClient) return;
1039
+
1040
+ try {
1041
+ // Fetch full issue to get team ID
1042
+ const fullIssue = await this.linearClient.getIssue(issue.identifier);
1043
+ if (!fullIssue) {
1044
+ logger.warn(`Could not fetch issue ${issue.identifier} for state transition`);
1045
+ return;
1046
+ }
1047
+
1048
+ const teamId = (fullIssue as any).team?.id;
1049
+ if (!teamId) {
1050
+ logger.warn(`Could not get team ID for issue ${issue.identifier}`);
1051
+ return;
1052
+ }
1053
+
1054
+ const doneStateId = await this.linearClient.findStateId(teamId, "Done");
1055
+ if (!doneStateId) {
1056
+ logger.warn(`Could not find "Done" state for team ${teamId}`);
1057
+ return;
1058
+ }
1059
+
1060
+ await this.linearClient.updateIssue(issue.id, { stateId: doneStateId });
1061
+ logger.info(`Transitioned ${issue.identifier} to Done (all subtasks complete)`);
1062
+ } catch (err) {
1063
+ logger.error(`Failed to transition ${issue.identifier} to Done: ${(err as Error).message}`);
1064
+ }
1065
+ }
1066
+
1067
+ // ── Agent Event Handling ──────────────────────────────────────
1068
+
1069
+ private handleAgentEvent(issueId: string, event: AgentEvent): void {
1070
+ if (!this.orchState) return;
1071
+
1072
+ const entry = this.orchState.running.get(issueId);
1073
+ if (!entry) return;
1074
+
1075
+ if (event.event === "token_usage_updated" && event.usage) {
1076
+ state.updateTotals(this.orchState, {
1077
+ delta_input: event.usage.input_tokens ?? 0,
1078
+ delta_output: event.usage.output_tokens ?? 0,
1079
+ delta_total: event.usage.total_tokens ?? 0,
1080
+ });
1081
+ }
1082
+
1083
+ // Track rate limits
1084
+ if (event.payload) {
1085
+ const limits = parseRateLimits(event.payload);
1086
+ if (limits) {
1087
+ state.updateRateLimits(this.orchState, limits);
1088
+ }
1089
+ }
1090
+
1091
+ logger.debug(`Agent event: ${event.event}`, {
1092
+ issue_identifier: entry.issue.identifier,
1093
+ event: event.event,
1094
+ });
1095
+ }
1096
+
1097
+ // ── Retry Queue ───────────────────────────────────────────────
1098
+
1099
+ private queueRetry(issue: Issue, attempt: number, error: string | null): void {
1100
+ if (!this.config || !this.orchState) return;
1101
+
1102
+ const maxRetries = this.config.agent.max_retries;
1103
+ if (maxRetries > 0 && attempt > maxRetries) {
1104
+ logger.warn(`Max retries (${maxRetries}) exceeded for ${issue.identifier}, giving up`, {
1105
+ issue_id: issue.id,
1106
+ issue_identifier: issue.identifier,
1107
+ attempt,
1108
+ });
1109
+ state.releaseClaim(this.orchState, issue.id);
1110
+ return;
1111
+ }
1112
+
1113
+ const delayMs = scheduler.calculateBackoffDelay(
1114
+ attempt,
1115
+ this.config.agent.max_retry_backoff_ms
1116
+ );
1117
+ const dueAtMs = Date.now() + delayMs;
1118
+
1119
+ const timerHandle = setTimeout(() => {
1120
+ this.handleRetryFired(issue.id);
1121
+ }, delayMs);
1122
+
1123
+ const retryEntry: RetryEntry = {
1124
+ issue_id: issue.id,
1125
+ identifier: issue.identifier,
1126
+ attempt,
1127
+ due_at_ms: dueAtMs,
1128
+ timer_handle: timerHandle,
1129
+ error,
1130
+ };
1131
+
1132
+ state.addRetry(this.orchState, retryEntry);
1133
+
1134
+ logger.info(`Queued retry for ${issue.identifier}`, {
1135
+ issue_id: issue.id,
1136
+ attempt,
1137
+ delay_ms: delayMs,
1138
+ });
1139
+ }
1140
+
1141
+ private queueContinuationRetry(issue: Issue): void {
1142
+ if (!this.orchState) return;
1143
+
1144
+ const delayMs = scheduler.CONTINUATION_RETRY_DELAY_MS;
1145
+ const dueAtMs = Date.now() + delayMs;
1146
+
1147
+ const timerHandle = setTimeout(() => {
1148
+ this.handleRetryFired(issue.id);
1149
+ }, delayMs);
1150
+
1151
+ const retryEntry: RetryEntry = {
1152
+ issue_id: issue.id,
1153
+ identifier: issue.identifier,
1154
+ attempt: 1,
1155
+ due_at_ms: dueAtMs,
1156
+ timer_handle: timerHandle,
1157
+ error: null,
1158
+ };
1159
+
1160
+ state.addRetry(this.orchState, retryEntry);
1161
+
1162
+ logger.debug(`Queued continuation retry for ${issue.identifier}`);
1163
+ }
1164
+
1165
+ private async handleRetryFired(issueId: string): Promise<void> {
1166
+ if (!this.config || !this.linearClient || !this.orchState) return;
1167
+
1168
+ const retryEntry = state.removeRetry(this.orchState, issueId);
1169
+ if (!retryEntry) return;
1170
+
1171
+ try {
1172
+ // Fetch active candidates
1173
+ const candidates = await this.fetchCandidateIssues();
1174
+
1175
+ // Find our issue
1176
+ const issue = candidates.find((c) => c.id === issueId);
1177
+
1178
+ if (!issue) {
1179
+ // Issue no longer active
1180
+ state.releaseClaim(this.orchState, issueId);
1181
+ logger.info(`Retry released claim for ${retryEntry.identifier} (no longer active)`);
1182
+ return;
1183
+ }
1184
+
1185
+ // Check if we have slots
1186
+ if (scheduler.getAvailableSlots(this.orchState, this.config) <= 0) {
1187
+ // Requeue
1188
+ this.queueRetry(issue, retryEntry.attempt, "no available orchestrator slots");
1189
+ return;
1190
+ }
1191
+
1192
+ // Release old claim and dispatch fresh
1193
+ state.releaseClaim(this.orchState, issueId);
1194
+ this.dispatch(issue, retryEntry.attempt);
1195
+ } catch (err) {
1196
+ logger.error(`Retry handling failed: ${(err as Error).message}`, {
1197
+ issue_id: issueId,
1198
+ });
1199
+ // Requeue with backoff
1200
+ this.queueRetry(
1201
+ { id: issueId, identifier: retryEntry.identifier } as Issue,
1202
+ retryEntry.attempt + 1,
1203
+ (err as Error).message
1204
+ );
1205
+ }
1206
+ }
1207
+
1208
+ // ── Observability ─────────────────────────────────────────────
1209
+
1210
+ getSnapshot(): state.RuntimeSnapshot | null {
1211
+ if (!this.orchState) return null;
1212
+ return state.createSnapshot(this.orchState, this.workflowName);
1213
+ }
1214
+
1215
+ isRunning(): boolean {
1216
+ return this.running;
1217
+ }
1218
+
1219
+ // ── Managed Mode Methods (used by MultiOrchestrator) ──────────
1220
+
1221
+ /** Get IDs of all currently running issues */
1222
+ getRunningIssueIds(): string[] {
1223
+ if (!this.orchState) return [];
1224
+ return Array.from(this.orchState.running.keys());
1225
+ }
1226
+
1227
+ /** Run stall detection on running entries */
1228
+ runStallDetection(): void {
1229
+ if (!this.config || !this.orchState) return;
1230
+
1231
+ const stallTimeoutMs = this.config.agent.stall_timeout_ms;
1232
+ if (stallTimeoutMs <= 0) return;
1233
+
1234
+ const now = Date.now();
1235
+ for (const [issueId, entry] of this.orchState.running) {
1236
+ const lastActivity = entry.session?.last_activity_at?.getTime() ??
1237
+ entry.attempt.started_at.getTime();
1238
+ const elapsed = now - lastActivity;
1239
+
1240
+ if (elapsed > stallTimeoutMs) {
1241
+ logger.warn(`Stall detected for ${entry.issue.identifier}`, {
1242
+ issue_id: issueId,
1243
+ elapsed_ms: elapsed,
1244
+ });
1245
+
1246
+ entry.abortController.abort();
1247
+ this.queueRetry(
1248
+ entry.issue,
1249
+ (entry.attempt.attempt ?? 0) + 1,
1250
+ "stall timeout"
1251
+ );
1252
+ }
1253
+ }
1254
+ }
1255
+
1256
+ /** Apply reconciliation results from externally-fetched state map */
1257
+ async applyReconcileStates(stateMap: Map<string, string>): Promise<void> {
1258
+ if (!this.config || !this.orchState || !this.workspaceManager) return;
1259
+
1260
+ for (const [issueId, entry] of this.orchState.running) {
1261
+ const currentState = stateMap.get(issueId);
1262
+ if (!currentState) continue;
1263
+
1264
+ const normalizedState = currentState.trim().toLowerCase();
1265
+ const isTerminal = this.config.tracker.terminal_states.some(
1266
+ (s) => s.trim().toLowerCase() === normalizedState
1267
+ );
1268
+ const isActive = this.config.tracker.active_states.some(
1269
+ (s) => s.trim().toLowerCase() === normalizedState
1270
+ );
1271
+
1272
+ if (isTerminal) {
1273
+ logger.info(`Issue ${entry.issue.identifier} is now terminal, stopping worker`, {
1274
+ issue_id: issueId,
1275
+ state: currentState,
1276
+ });
1277
+ entry.abortController.abort();
1278
+ state.removeRunning(this.orchState, issueId);
1279
+ state.releaseClaim(this.orchState, issueId);
1280
+ await this.workspaceManager.removeWorkspace(entry.issue.identifier);
1281
+ } else if (!isActive) {
1282
+ logger.info(`Issue ${entry.issue.identifier} is no longer active, stopping worker`, {
1283
+ issue_id: issueId,
1284
+ state: currentState,
1285
+ });
1286
+ entry.abortController.abort();
1287
+ state.removeRunning(this.orchState, issueId);
1288
+ state.releaseClaim(this.orchState, issueId);
1289
+ } else {
1290
+ entry.issue.state = currentState;
1291
+ }
1292
+ }
1293
+ }
1294
+
1295
+ /** Refresh workflow config from file. Returns true if successful. */
1296
+ refreshConfig(): boolean {
1297
+ try {
1298
+ const freshWorkflow = loadWorkflow(this.workflowPath);
1299
+ const freshConfig = buildServiceConfig(freshWorkflow);
1300
+ const validation = validateServiceConfig(freshConfig);
1301
+
1302
+ if (!validation.valid) {
1303
+ logger.error("Config refresh failed validation", { errors: validation.errors });
1304
+ return false;
1305
+ }
1306
+
1307
+ this.workflow = freshWorkflow;
1308
+ this.config = freshConfig;
1309
+ return true;
1310
+ } catch (err) {
1311
+ logger.error(`Config refresh failed: ${(err as Error).message}`);
1312
+ return false;
1313
+ }
1314
+ }
1315
+
1316
+ /** Dispatch from pre-fetched issues. Returns count dispatched. */
1317
+ dispatchFromIssues(issues: Issue[]): number {
1318
+ if (!this.config || !this.orchState) return 0;
1319
+
1320
+ const { eligible } = scheduler.selectCandidates(
1321
+ issues,
1322
+ this.orchState,
1323
+ this.config
1324
+ );
1325
+
1326
+ logger.debug(`[${this.workflowName}] ${issues.length} issues, ${eligible.length} eligible`, {
1327
+ running: this.orchState.running.size,
1328
+ retrying: this.orchState.retry_attempts.size,
1329
+ });
1330
+
1331
+ let dispatched = 0;
1332
+ for (const issue of eligible) {
1333
+ if (scheduler.getAvailableSlots(this.orchState, this.config) <= 0) break;
1334
+ if (this.dispatch(issue, null)) dispatched++;
1335
+ }
1336
+
1337
+ return dispatched;
1338
+ }
1339
+
1340
+ /** Get current service config (for MultiOrchestrator coordination) */
1341
+ getServiceConfig(): ServiceConfig | null {
1342
+ return this.config;
1343
+ }
1344
+
1345
+ /** Get workflow path */
1346
+ getWorkflowPath(): string {
1347
+ return this.workflowPath;
1348
+ }
1349
+ }
1350
+
1351
+ function formatTime(): string {
1352
+ return new Date().toLocaleTimeString("en-GB", {
1353
+ hour: "2-digit",
1354
+ minute: "2-digit",
1355
+ hour12: false,
1356
+ });
1357
+ }