@renseiai/agentfactory-cli 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +123 -0
  3. package/dist/src/agent.d.ts +20 -0
  4. package/dist/src/agent.d.ts.map +1 -0
  5. package/dist/src/agent.js +109 -0
  6. package/dist/src/analyze-logs.d.ts +26 -0
  7. package/dist/src/analyze-logs.d.ts.map +1 -0
  8. package/dist/src/analyze-logs.js +152 -0
  9. package/dist/src/cleanup.d.ts +17 -0
  10. package/dist/src/cleanup.d.ts.map +1 -0
  11. package/dist/src/cleanup.js +111 -0
  12. package/dist/src/governor.d.ts +26 -0
  13. package/dist/src/governor.d.ts.map +1 -0
  14. package/dist/src/governor.js +305 -0
  15. package/dist/src/index.d.ts +10 -0
  16. package/dist/src/index.d.ts.map +1 -0
  17. package/dist/src/index.js +76 -0
  18. package/dist/src/lib/agent-runner.d.ts +28 -0
  19. package/dist/src/lib/agent-runner.d.ts.map +1 -0
  20. package/dist/src/lib/agent-runner.js +272 -0
  21. package/dist/src/lib/analyze-logs-runner.d.ts +47 -0
  22. package/dist/src/lib/analyze-logs-runner.d.ts.map +1 -0
  23. package/dist/src/lib/analyze-logs-runner.js +216 -0
  24. package/dist/src/lib/auto-updater.d.ts +40 -0
  25. package/dist/src/lib/auto-updater.d.ts.map +1 -0
  26. package/dist/src/lib/auto-updater.js +109 -0
  27. package/dist/src/lib/cleanup-runner.d.ts +29 -0
  28. package/dist/src/lib/cleanup-runner.d.ts.map +1 -0
  29. package/dist/src/lib/cleanup-runner.js +295 -0
  30. package/dist/src/lib/governor-dependencies.d.ts +23 -0
  31. package/dist/src/lib/governor-dependencies.d.ts.map +1 -0
  32. package/dist/src/lib/governor-dependencies.js +361 -0
  33. package/dist/src/lib/governor-logger.d.ts +30 -0
  34. package/dist/src/lib/governor-logger.d.ts.map +1 -0
  35. package/dist/src/lib/governor-logger.js +210 -0
  36. package/dist/src/lib/governor-runner.d.ts +103 -0
  37. package/dist/src/lib/governor-runner.d.ts.map +1 -0
  38. package/dist/src/lib/governor-runner.js +210 -0
  39. package/dist/src/lib/linear-runner.d.ts +8 -0
  40. package/dist/src/lib/linear-runner.d.ts.map +1 -0
  41. package/dist/src/lib/linear-runner.js +7 -0
  42. package/dist/src/lib/orchestrator-runner.d.ts +51 -0
  43. package/dist/src/lib/orchestrator-runner.d.ts.map +1 -0
  44. package/dist/src/lib/orchestrator-runner.js +151 -0
  45. package/dist/src/lib/queue-admin-runner.d.ts +30 -0
  46. package/dist/src/lib/queue-admin-runner.d.ts.map +1 -0
  47. package/dist/src/lib/queue-admin-runner.js +378 -0
  48. package/dist/src/lib/sync-routes-runner.d.ts +28 -0
  49. package/dist/src/lib/sync-routes-runner.d.ts.map +1 -0
  50. package/dist/src/lib/sync-routes-runner.js +110 -0
  51. package/dist/src/lib/version.d.ts +35 -0
  52. package/dist/src/lib/version.d.ts.map +1 -0
  53. package/dist/src/lib/version.js +168 -0
  54. package/dist/src/lib/worker-fleet-runner.d.ts +32 -0
  55. package/dist/src/lib/worker-fleet-runner.d.ts.map +1 -0
  56. package/dist/src/lib/worker-fleet-runner.js +256 -0
  57. package/dist/src/lib/worker-runner.d.ts +33 -0
  58. package/dist/src/lib/worker-runner.d.ts.map +1 -0
  59. package/dist/src/lib/worker-runner.js +781 -0
  60. package/dist/src/linear.d.ts +37 -0
  61. package/dist/src/linear.d.ts.map +1 -0
  62. package/dist/src/linear.js +118 -0
  63. package/dist/src/orchestrator.d.ts +21 -0
  64. package/dist/src/orchestrator.d.ts.map +1 -0
  65. package/dist/src/orchestrator.js +190 -0
  66. package/dist/src/queue-admin.d.ts +25 -0
  67. package/dist/src/queue-admin.d.ts.map +1 -0
  68. package/dist/src/queue-admin.js +96 -0
  69. package/dist/src/sync-routes.d.ts +17 -0
  70. package/dist/src/sync-routes.d.ts.map +1 -0
  71. package/dist/src/sync-routes.js +100 -0
  72. package/dist/src/worker-fleet.d.ts +25 -0
  73. package/dist/src/worker-fleet.d.ts.map +1 -0
  74. package/dist/src/worker-fleet.js +140 -0
  75. package/dist/src/worker.d.ts +26 -0
  76. package/dist/src/worker.d.ts.map +1 -0
  77. package/dist/src/worker.js +135 -0
  78. package/package.json +175 -0
@@ -0,0 +1,781 @@
1
+ /**
2
+ * Worker Runner — Programmatic API for the remote worker CLI.
3
+ *
4
+ * Encapsulates all global state into the runner function's closure so that
5
+ * multiple workers can be started from the same process (e.g. tests) without
6
+ * leaking state between invocations.
7
+ */
8
+ import path from 'path';
9
+ import { execSync } from 'child_process';
10
+ import os from 'os';
11
+ import { createOrchestrator, createLogger, } from '@renseiai/agentfactory';
12
+ // ---------------------------------------------------------------------------
13
+ // Helpers (stateless)
14
+ // ---------------------------------------------------------------------------
15
+ function getGitRoot() {
16
+ try {
17
+ return execSync('git rev-parse --show-toplevel', {
18
+ encoding: 'utf-8',
19
+ stdio: ['pipe', 'pipe', 'pipe'],
20
+ }).trim();
21
+ }
22
+ catch {
23
+ return process.cwd();
24
+ }
25
+ }
26
+ const MAX_HEARTBEAT_FAILURES = 3;
27
+ // ---------------------------------------------------------------------------
28
+ // Runner
29
+ // ---------------------------------------------------------------------------
30
+ /**
31
+ * Run a worker that polls the coordinator for work and executes agents.
32
+ *
33
+ * All state is encapsulated in the function closure. The caller can cancel
34
+ * via the optional {@link AbortSignal}.
35
+ */
36
+ export async function runWorker(config, signal) {
37
+ // Resolve config with defaults
38
+ const hostname = config.hostname ?? os.hostname();
39
+ const capacity = config.capacity ?? 3;
40
+ const dryRun = config.dryRun ?? false;
41
+ const gitRoot = config.gitRoot ?? getGitRoot();
42
+ const linearApiKey = config.linearApiKey ?? process.env.LINEAR_API_KEY;
43
+ if (!linearApiKey) {
44
+ throw new Error('LINEAR_API_KEY is required (pass via config.linearApiKey or set env var)');
45
+ }
46
+ // -----------------------------------------------------------------------
47
+ // State (formerly globals)
48
+ // -----------------------------------------------------------------------
49
+ let workerId = null;
50
+ let workerShortId = null;
51
+ let activeCount = 0;
52
+ let running = true;
53
+ let heartbeatTimer = null;
54
+ let shutdownInProgress = false;
55
+ let consecutiveHeartbeatFailures = 0;
56
+ let reregistrationInProgress = false;
57
+ let claimFailureCount = 0;
58
+ const activeOrchestrators = new Map();
59
+ // Logger — will be re-created after registration with worker context
60
+ let log = createLogger({}, { showTimestamp: true });
61
+ // Internal config object used by API helpers
62
+ const workerConfig = {
63
+ apiUrl: config.apiUrl,
64
+ apiKey: config.apiKey,
65
+ hostname,
66
+ capacity,
67
+ dryRun,
68
+ };
69
+ // -----------------------------------------------------------------------
70
+ // AbortSignal handling
71
+ // -----------------------------------------------------------------------
72
+ const onAbort = () => {
73
+ if (shutdownInProgress)
74
+ return;
75
+ shutdownInProgress = true;
76
+ log.warn('Shutting down (abort signal)...');
77
+ running = false;
78
+ if (heartbeatTimer)
79
+ clearInterval(heartbeatTimer);
80
+ // Fire and forget — server will clean up via heartbeat timeout
81
+ deregister().catch(() => { });
82
+ };
83
+ signal?.addEventListener('abort', onAbort, { once: true });
84
+ // -----------------------------------------------------------------------
85
+ // API helpers (closures over workerConfig & log)
86
+ // -----------------------------------------------------------------------
87
+ async function apiRequestWithError(apiPath, options = {}, retries = 3) {
88
+ const url = `${workerConfig.apiUrl}${apiPath}`;
89
+ for (let attempt = 1; attempt <= retries; attempt++) {
90
+ try {
91
+ const response = await fetch(url, {
92
+ ...options,
93
+ headers: {
94
+ 'Content-Type': 'application/json',
95
+ Authorization: `Bearer ${workerConfig.apiKey}`,
96
+ ...options.headers,
97
+ },
98
+ });
99
+ if (!response.ok) {
100
+ const errorBody = await response.text();
101
+ if (response.status === 404 && errorBody.includes('Worker not found')) {
102
+ log.warn(`Worker not found on server: ${apiPath}`, { status: response.status });
103
+ return { data: null, error: { type: 'worker_not_found' } };
104
+ }
105
+ log.error(`API request failed: ${apiPath}`, { status: response.status, body: errorBody });
106
+ return { data: null, error: { type: 'server_error', status: response.status, body: errorBody } };
107
+ }
108
+ return { data: (await response.json()), error: null };
109
+ }
110
+ catch (error) {
111
+ const errorMsg = error instanceof Error ? error.message : String(error);
112
+ const isLastAttempt = attempt === retries;
113
+ if (isLastAttempt) {
114
+ log.error(`API request error: ${apiPath}`, { error: errorMsg, attempts: attempt });
115
+ return { data: null, error: { type: 'network_error', message: errorMsg } };
116
+ }
117
+ const delay = Math.pow(2, attempt - 1) * 1000;
118
+ log.warn(`API request failed, retrying in ${delay}ms: ${apiPath}`, {
119
+ error: errorMsg,
120
+ attempt,
121
+ maxRetries: retries,
122
+ });
123
+ await new Promise((resolve) => setTimeout(resolve, delay));
124
+ }
125
+ }
126
+ return { data: null, error: { type: 'network_error', message: 'Max retries exceeded' } };
127
+ }
128
+ async function apiRequest(apiPath, options = {}, retries = 3) {
129
+ const result = await apiRequestWithError(apiPath, options, retries);
130
+ return result.data;
131
+ }
132
+ // -----------------------------------------------------------------------
133
+ // Registration
134
+ // -----------------------------------------------------------------------
135
+ async function register() {
136
+ log.info('Registering with coordinator', {
137
+ apiUrl: workerConfig.apiUrl,
138
+ hostname: workerConfig.hostname,
139
+ capacity: workerConfig.capacity,
140
+ });
141
+ const result = await apiRequest('/api/workers/register', {
142
+ method: 'POST',
143
+ body: JSON.stringify({
144
+ hostname: workerConfig.hostname,
145
+ capacity: workerConfig.capacity,
146
+ version: '1.0.0',
147
+ projects: config.projects,
148
+ }),
149
+ });
150
+ if (result) {
151
+ log.status('registered', `Worker ID: ${result.workerId.substring(0, 8)}`);
152
+ }
153
+ return result;
154
+ }
155
+ async function transferSessionOwnership(sessionId, newWorkerId, oldWorkerId) {
156
+ const result = await apiRequest(`/api/sessions/${sessionId}/transfer-ownership`, {
157
+ method: 'POST',
158
+ body: JSON.stringify({ newWorkerId, oldWorkerId }),
159
+ });
160
+ if (result?.transferred) {
161
+ log.debug('Session ownership transferred', {
162
+ sessionId: sessionId.substring(0, 8),
163
+ oldWorkerId: oldWorkerId.substring(0, 8),
164
+ newWorkerId: newWorkerId.substring(0, 8),
165
+ });
166
+ return true;
167
+ }
168
+ else {
169
+ log.warn('Failed to transfer session ownership', {
170
+ sessionId: sessionId.substring(0, 8),
171
+ reason: result?.reason,
172
+ });
173
+ return false;
174
+ }
175
+ }
176
+ async function attemptReregistration() {
177
+ if (reregistrationInProgress) {
178
+ log.debug('Re-registration already in progress, skipping');
179
+ return false;
180
+ }
181
+ reregistrationInProgress = true;
182
+ const oldWorkerId = workerId;
183
+ log.warn('Worker not found on server - attempting to re-register');
184
+ try {
185
+ const registration = await register();
186
+ if (registration) {
187
+ const newWid = registration.workerId;
188
+ workerId = newWid;
189
+ workerShortId = newWid.substring(4, 8); // Skip 'wkr_' prefix
190
+ consecutiveHeartbeatFailures = 0;
191
+ log.status('re-registered', `New Worker ID: ${workerShortId}`);
192
+ // Transfer ownership of active sessions to the new worker ID
193
+ if (oldWorkerId && activeOrchestrators.size > 0) {
194
+ log.info('Transferring ownership of active sessions', {
195
+ sessionCount: activeOrchestrators.size,
196
+ oldWorkerId: oldWorkerId.substring(0, 8),
197
+ newWorkerId: newWid.substring(0, 8),
198
+ });
199
+ const transferPromises = [];
200
+ for (const sessionId of activeOrchestrators.keys()) {
201
+ transferPromises.push(transferSessionOwnership(sessionId, newWid, oldWorkerId));
202
+ }
203
+ const results = await Promise.all(transferPromises);
204
+ const successCount = results.filter(Boolean).length;
205
+ log.info('Session ownership transfer complete', {
206
+ total: results.length,
207
+ succeeded: successCount,
208
+ failed: results.length - successCount,
209
+ });
210
+ // Update worker ID in all active orchestrators' activity emitters
211
+ for (const [sessionId, orchestrator] of activeOrchestrators.entries()) {
212
+ orchestrator.updateWorkerId(newWid);
213
+ log.debug('Updated orchestrator worker ID', {
214
+ sessionId: sessionId.substring(0, 8),
215
+ });
216
+ }
217
+ }
218
+ return true;
219
+ }
220
+ log.error('Re-registration failed');
221
+ return false;
222
+ }
223
+ finally {
224
+ reregistrationInProgress = false;
225
+ }
226
+ }
227
+ // -----------------------------------------------------------------------
228
+ // Heartbeat
229
+ // -----------------------------------------------------------------------
230
+ async function sendHeartbeat() {
231
+ if (!workerId)
232
+ return;
233
+ const result = await apiRequestWithError(`/api/workers/${workerId}/heartbeat`, {
234
+ method: 'POST',
235
+ body: JSON.stringify({
236
+ activeCount,
237
+ load: {
238
+ cpu: os.loadavg()[0],
239
+ memory: 1 - os.freemem() / os.totalmem(),
240
+ },
241
+ }),
242
+ });
243
+ if (result.data) {
244
+ consecutiveHeartbeatFailures = 0;
245
+ if (claimFailureCount > 0) {
246
+ log.info('Claim race summary since last heartbeat', { claimFailures: claimFailureCount });
247
+ claimFailureCount = 0;
248
+ }
249
+ log.debug('Heartbeat acknowledged', {
250
+ activeCount,
251
+ pendingWorkCount: result.data.pendingWorkCount,
252
+ });
253
+ }
254
+ else if (result.error?.type === 'worker_not_found') {
255
+ consecutiveHeartbeatFailures++;
256
+ await attemptReregistration();
257
+ }
258
+ else {
259
+ consecutiveHeartbeatFailures++;
260
+ log.warn('Heartbeat failed', {
261
+ consecutiveFailures: consecutiveHeartbeatFailures,
262
+ maxFailures: MAX_HEARTBEAT_FAILURES,
263
+ errorType: result.error?.type,
264
+ });
265
+ if (consecutiveHeartbeatFailures >= MAX_HEARTBEAT_FAILURES) {
266
+ log.error('Multiple heartbeat failures - checking if re-registration needed', {
267
+ consecutiveFailures: consecutiveHeartbeatFailures,
268
+ });
269
+ await attemptReregistration();
270
+ }
271
+ }
272
+ }
273
+ // -----------------------------------------------------------------------
274
+ // Polling & claiming
275
+ // -----------------------------------------------------------------------
276
+ async function pollForWork() {
277
+ if (!workerId)
278
+ return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
279
+ const result = await apiRequestWithError(`/api/workers/${workerId}/poll`);
280
+ if (result.error?.type === 'worker_not_found') {
281
+ await attemptReregistration();
282
+ return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
283
+ }
284
+ if (!result.data) {
285
+ return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
286
+ }
287
+ const pollData = result.data;
288
+ if (pollData.hasPendingPrompts) {
289
+ const totalPrompts = Object.values(pollData.pendingPrompts).reduce((sum, prompts) => sum + prompts.length, 0);
290
+ log.info('Received pending prompts', {
291
+ sessionCount: Object.keys(pollData.pendingPrompts).length,
292
+ totalPrompts,
293
+ sessions: Object.entries(pollData.pendingPrompts).map(([sessionId, prompts]) => ({
294
+ sessionId: sessionId.substring(0, 8),
295
+ promptCount: prompts.length,
296
+ promptIds: prompts.map((p) => p.id),
297
+ })),
298
+ });
299
+ }
300
+ return pollData;
301
+ }
302
+ async function claimWork(sessionId) {
303
+ if (!workerId)
304
+ return null;
305
+ return apiRequest(`/api/sessions/${sessionId}/claim`, {
306
+ method: 'POST',
307
+ body: JSON.stringify({ workerId }),
308
+ });
309
+ }
310
+ async function reportStatus(sessionId, status, extra) {
311
+ if (!workerId)
312
+ return;
313
+ await apiRequest(`/api/sessions/${sessionId}/status`, {
314
+ method: 'POST',
315
+ body: JSON.stringify({
316
+ workerId,
317
+ status,
318
+ ...extra,
319
+ }),
320
+ });
321
+ log.debug(`Reported status: ${status}`, { sessionId });
322
+ }
323
+ async function postProgress(sessionId, milestone, message) {
324
+ if (!workerId)
325
+ return;
326
+ const result = await apiRequest(`/api/sessions/${sessionId}/progress`, {
327
+ method: 'POST',
328
+ body: JSON.stringify({
329
+ workerId,
330
+ milestone,
331
+ message,
332
+ }),
333
+ });
334
+ if (result?.posted) {
335
+ log.debug(`Progress posted: ${milestone}`, { sessionId });
336
+ }
337
+ else {
338
+ log.warn(`Failed to post progress: ${milestone}`, { reason: result?.reason });
339
+ }
340
+ }
341
+ async function checkSessionOwnership(sessionId) {
342
+ return apiRequest(`/api/sessions/${sessionId}/status`);
343
+ }
344
+ async function checkSessionStopped(sessionId) {
345
+ const result = await apiRequest(`/api/sessions/${sessionId}/status`);
346
+ return result?.status === 'stopped';
347
+ }
348
+ async function deregister() {
349
+ if (!workerId)
350
+ return;
351
+ log.info('Deregistering worker');
352
+ const result = await apiRequest(`/api/workers/${workerId}`, {
353
+ method: 'DELETE',
354
+ });
355
+ if (result) {
356
+ log.status('stopped', `Unclaimed sessions: ${result.unclaimedSessions.length}`);
357
+ }
358
+ workerId = null;
359
+ }
360
+ // -----------------------------------------------------------------------
361
+ // Agent logger factory
362
+ // -----------------------------------------------------------------------
363
+ function createAgentLogger(issueIdentifier) {
364
+ return log.child({ issueIdentifier });
365
+ }
366
+ // -----------------------------------------------------------------------
367
+ // Work execution
368
+ // -----------------------------------------------------------------------
369
+ async function executeWork(work) {
370
+ const agentLog = createAgentLogger(work.issueIdentifier);
371
+ const isResume = !!work.providerSessionId;
372
+ agentLog.section(`${isResume ? 'Resuming' : 'Starting'} work on ${work.issueIdentifier}`);
373
+ agentLog.info('Work details', {
374
+ hasPrompt: !!work.prompt,
375
+ isResume,
376
+ workType: work.workType,
377
+ });
378
+ activeCount++;
379
+ // Two-phase completion: set in try/catch, read in finally
380
+ let finalStatus = 'failed';
381
+ let statusPayload;
382
+ // Issue lock TTL refresher
383
+ let lockRefresher = null;
384
+ try {
385
+ await reportStatus(work.sessionId, 'running');
386
+ // Start lock TTL refresher (refresh every 60s, lock TTL is 2 hours)
387
+ if (work.issueId) {
388
+ lockRefresher = setInterval(async () => {
389
+ try {
390
+ const response = await apiRequest(`/api/sessions/${work.sessionId}/lock-refresh`, {
391
+ method: 'POST',
392
+ body: JSON.stringify({ workerId, issueId: work.issueId }),
393
+ });
394
+ if (response?.refreshed) {
395
+ agentLog.debug('Issue lock TTL refreshed');
396
+ }
397
+ }
398
+ catch {
399
+ // Non-fatal — lock has a 2hr TTL so missing one refresh is fine
400
+ }
401
+ }, 60_000);
402
+ }
403
+ // Post initial progress
404
+ await postProgress(work.sessionId, isResume ? 'resumed' : 'claimed', isResume
405
+ ? `Resuming work on ${work.issueIdentifier}`
406
+ : `Worker claimed ${work.issueIdentifier}. Setting up environment...`);
407
+ // Create orchestrator with API activity proxy
408
+ const orchestrator = createOrchestrator({
409
+ maxConcurrent: 1,
410
+ worktreePath: path.resolve(gitRoot, '.worktrees'),
411
+ apiActivityConfig: {
412
+ baseUrl: workerConfig.apiUrl,
413
+ apiKey: workerConfig.apiKey,
414
+ workerId: workerId,
415
+ },
416
+ }, {
417
+ onIssueSelected: (issue) => {
418
+ agentLog.info('Issue fetched', {
419
+ title: issue.title.slice(0, 50),
420
+ labels: issue.labels.join(', '),
421
+ });
422
+ },
423
+ onAgentStart: (agent) => {
424
+ agentLog.status('running', agent.pid ? `PID: ${agent.pid}` : 'spawning');
425
+ agentLog.debug('Agent details', {
426
+ worktree: agent.worktreePath,
427
+ });
428
+ reportStatus(work.sessionId, 'running', {
429
+ providerSessionId: agent.sessionId,
430
+ worktreePath: agent.worktreePath,
431
+ });
432
+ postProgress(work.sessionId, 'started', `Agent started working on ${agent.identifier}`);
433
+ },
434
+ onAgentComplete: (agent) => {
435
+ agentLog.status('completed', `Exit code: ${agent.exitCode}`);
436
+ },
437
+ onAgentError: (_agent, error) => {
438
+ agentLog.error('Agent error', { error: error.message });
439
+ },
440
+ onAgentStopped: (_agent) => {
441
+ agentLog.status('stopped');
442
+ },
443
+ onAgentIncomplete: (agent) => {
444
+ agentLog.warn('Agent incomplete - worktree preserved', {
445
+ reason: agent.incompleteReason,
446
+ worktreePath: agent.worktreePath,
447
+ });
448
+ },
449
+ onProviderSessionId: (_linearSessionId, providerSessionId) => {
450
+ agentLog.debug('Provider session captured', { providerSessionId });
451
+ reportStatus(work.sessionId, 'running', {
452
+ providerSessionId,
453
+ });
454
+ },
455
+ });
456
+ // Store orchestrator for prompt forwarding
457
+ activeOrchestrators.set(work.sessionId, orchestrator);
458
+ agentLog.debug('Orchestrator registered for session', {
459
+ sessionId: work.sessionId.substring(0, 8),
460
+ });
461
+ let spawnedAgent;
462
+ // Retry configuration for "agent already running" conflicts
463
+ const MAX_SPAWN_RETRIES = 3;
464
+ const SPAWN_RETRY_DELAY_MS = 15000;
465
+ if (work.providerSessionId) {
466
+ // Resume existing Claude session
467
+ agentLog.info('Resuming provider session', {
468
+ providerSessionId: work.providerSessionId.substring(0, 12),
469
+ });
470
+ const prompt = work.prompt || `Continue work on ${work.issueIdentifier}`;
471
+ const result = await orchestrator.forwardPrompt(work.issueId, work.sessionId, prompt, work.providerSessionId, work.workType);
472
+ if (!result.forwarded || !result.agent) {
473
+ throw new Error(`Failed to resume session: ${result.reason || 'unknown error'}`);
474
+ }
475
+ agentLog.success('Session resumed');
476
+ spawnedAgent = result.agent;
477
+ }
478
+ else {
479
+ // Fresh start with retry logic
480
+ agentLog.info('Spawning new agent', { workType: work.workType });
481
+ let lastError = null;
482
+ for (let attempt = 1; attempt <= MAX_SPAWN_RETRIES; attempt++) {
483
+ try {
484
+ spawnedAgent = await orchestrator.spawnAgentForIssue(work.issueIdentifier, work.sessionId, work.workType, work.prompt);
485
+ break;
486
+ }
487
+ catch (err) {
488
+ lastError = err instanceof Error ? err : new Error(String(err));
489
+ const isAgentRunning = lastError.message.includes('Agent already running') ||
490
+ lastError.message.includes('Agent is still running');
491
+ const isBranchConflict = lastError.message.includes('already checked out') ||
492
+ lastError.message.includes('is already checked out at');
493
+ const isRetriable = isAgentRunning || isBranchConflict;
494
+ if (isRetriable && attempt < MAX_SPAWN_RETRIES) {
495
+ // For "agent already running" errors, check if another worker owns this session
496
+ // If so, bail immediately instead of wasting retries
497
+ if (isAgentRunning && !isBranchConflict) {
498
+ try {
499
+ const sessionStatus = await checkSessionOwnership(work.sessionId);
500
+ if (sessionStatus?.workerId && sessionStatus.workerId !== workerId) {
501
+ agentLog.warn('Session owned by another worker, abandoning spawn', {
502
+ ownerWorkerId: sessionStatus.workerId.substring(0, 8),
503
+ });
504
+ throw new Error(`Session owned by another worker: ${sessionStatus.workerId}`);
505
+ }
506
+ }
507
+ catch (ownershipErr) {
508
+ // Re-throw ownership errors, swallow check failures
509
+ if (ownershipErr instanceof Error && ownershipErr.message.includes('Session owned by another worker')) {
510
+ throw ownershipErr;
511
+ }
512
+ }
513
+ }
514
+ const reason = isBranchConflict
515
+ ? 'Branch in use by another agent'
516
+ : 'Agent still running';
517
+ agentLog.warn(`${reason}, waiting to retry (attempt ${attempt}/${MAX_SPAWN_RETRIES})`, { retryInMs: SPAWN_RETRY_DELAY_MS });
518
+ await postProgress(work.sessionId, 'waiting', `${reason}, waiting to retry (${attempt}/${MAX_SPAWN_RETRIES})...`);
519
+ await new Promise((resolve) => setTimeout(resolve, SPAWN_RETRY_DELAY_MS));
520
+ }
521
+ else {
522
+ throw lastError;
523
+ }
524
+ }
525
+ }
526
+ if (!spawnedAgent) {
527
+ throw lastError || new Error('Failed to spawn agent after retries');
528
+ }
529
+ }
530
+ agentLog.info('Agent spawned', {
531
+ pid: spawnedAgent.pid,
532
+ status: spawnedAgent.status,
533
+ });
534
+ if (!spawnedAgent.pid) {
535
+ agentLog.warn('Agent has no PID - spawn may have failed');
536
+ }
537
+ // Start a stop signal checker
538
+ let stopRequested = false;
539
+ const stopChecker = setInterval(async () => {
540
+ try {
541
+ if (await checkSessionStopped(work.sessionId)) {
542
+ agentLog.warn('Stop signal received');
543
+ stopRequested = true;
544
+ clearInterval(stopChecker);
545
+ await orchestrator.stopAgent(work.issueId, false);
546
+ }
547
+ }
548
+ catch {
549
+ // Ignore errors in stop checker
550
+ }
551
+ }, 5000);
552
+ // Wait for agent to complete
553
+ agentLog.info('Waiting for agent to complete...');
554
+ const results = await orchestrator.waitForAll();
555
+ const agent = results[0];
556
+ clearInterval(stopChecker);
557
+ // Determine final status
558
+ if (stopRequested || agent?.stopReason === 'user_request') {
559
+ finalStatus = 'stopped';
560
+ await reportStatus(work.sessionId, 'finalizing');
561
+ await postProgress(work.sessionId, 'stopped', `Work stopped by user request`);
562
+ agentLog.status('stopped', 'Work stopped by user request');
563
+ }
564
+ else if (agent?.stopReason === 'timeout') {
565
+ finalStatus = 'failed';
566
+ statusPayload = { error: { message: 'Agent timed out' } };
567
+ await reportStatus(work.sessionId, 'finalizing');
568
+ await postProgress(work.sessionId, 'failed', `Work timed out`);
569
+ agentLog.status('stopped', 'Work timed out');
570
+ }
571
+ else if (agent?.status === 'completed') {
572
+ finalStatus = 'completed';
573
+ statusPayload = {
574
+ providerSessionId: agent.sessionId,
575
+ worktreePath: agent.worktreePath,
576
+ totalCostUsd: agent.totalCostUsd,
577
+ inputTokens: agent.inputTokens,
578
+ outputTokens: agent.outputTokens,
579
+ };
580
+ await reportStatus(work.sessionId, 'finalizing');
581
+ await postProgress(work.sessionId, 'completed', `Work completed successfully on ${work.issueIdentifier}`);
582
+ agentLog.success('Work completed successfully');
583
+ }
584
+ else {
585
+ const errorMsg = agent?.error?.message || 'Agent did not complete successfully';
586
+ finalStatus = 'failed';
587
+ statusPayload = { error: { message: errorMsg } };
588
+ await reportStatus(work.sessionId, 'finalizing');
589
+ await postProgress(work.sessionId, 'failed', `Work failed: ${errorMsg}`);
590
+ agentLog.error('Work failed', { error: errorMsg });
591
+ }
592
+ }
593
+ catch (error) {
594
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
595
+ agentLog.error('Work execution failed', { error: errorMsg });
596
+ finalStatus = 'failed';
597
+ statusPayload = { error: { message: errorMsg } };
598
+ await reportStatus(work.sessionId, 'finalizing').catch(() => { });
599
+ await postProgress(work.sessionId, 'failed', `Work failed: ${errorMsg}`);
600
+ }
601
+ finally {
602
+ if (lockRefresher)
603
+ clearInterval(lockRefresher);
604
+ activeOrchestrators.delete(work.sessionId);
605
+ agentLog.debug('Orchestrator unregistered for session', {
606
+ sessionId: work.sessionId.substring(0, 8),
607
+ });
608
+ activeCount--;
609
+ // Report true terminal status AFTER all cleanup
610
+ await reportStatus(work.sessionId, finalStatus, statusPayload).catch((err) => {
611
+ agentLog.error('Failed to report final status', {
612
+ error: err instanceof Error ? err.message : String(err),
613
+ });
614
+ });
615
+ }
616
+ }
617
+ // -----------------------------------------------------------------------
618
+ // Main logic
619
+ // -----------------------------------------------------------------------
620
+ try {
621
+ log.section('AgentFactory Worker');
622
+ log.info('Configuration', {
623
+ apiUrl: workerConfig.apiUrl,
624
+ hostname: workerConfig.hostname,
625
+ capacity: workerConfig.capacity,
626
+ dryRun: workerConfig.dryRun,
627
+ projects: config.projects?.length ? config.projects : 'all',
628
+ });
629
+ // Register with coordinator
630
+ const registration = await register();
631
+ if (!registration) {
632
+ throw new Error('Failed to register with coordinator');
633
+ }
634
+ workerId = registration.workerId;
635
+ workerShortId = registration.workerId.substring(0, 8);
636
+ // Update logger with worker context
637
+ log = createLogger({ workerId, workerShortId }, { showTimestamp: true });
638
+ // Auto-inherit projects from server if not explicitly configured
639
+ if (!config.projects?.length) {
640
+ try {
641
+ const serverConfig = await apiRequest('/api/config');
642
+ if (serverConfig?.projects?.length) {
643
+ config.projects = serverConfig.projects;
644
+ log.info('Auto-inherited projects from server', { projects: config.projects });
645
+ }
646
+ }
647
+ catch {
648
+ log.debug('Could not fetch server config, using no project filter');
649
+ }
650
+ }
651
+ // Set up heartbeat
652
+ heartbeatTimer = setInterval(() => sendHeartbeat(), registration.heartbeatInterval);
653
+ // Send initial heartbeat
654
+ await sendHeartbeat();
655
+ // Main poll loop
656
+ log.info('Starting poll loop...');
657
+ while (running) {
658
+ if (signal?.aborted)
659
+ break;
660
+ try {
661
+ const availableCapacity = workerConfig.capacity - activeCount;
662
+ const pollResult = await pollForWork();
663
+ // Handle new work items if we have capacity
664
+ if (availableCapacity > 0 && pollResult.work.length > 0) {
665
+ log.info(`Found ${pollResult.work.length} work item(s)`, {
666
+ activeCount,
667
+ availableCapacity,
668
+ });
669
+ for (const item of pollResult.work.slice(0, availableCapacity)) {
670
+ if (!running)
671
+ break;
672
+ const claimResult = await claimWork(item.sessionId);
673
+ if (claimResult?.claimed) {
674
+ log.status('claimed', item.issueIdentifier);
675
+ if (workerConfig.dryRun) {
676
+ log.info(`[DRY RUN] Would execute: ${item.issueIdentifier}`);
677
+ }
678
+ else {
679
+ executeWork(item).catch((error) => {
680
+ log.error('Background work execution failed', {
681
+ error: error instanceof Error ? error.message : String(error),
682
+ });
683
+ });
684
+ }
685
+ }
686
+ else {
687
+ claimFailureCount++;
688
+ log.debug(`Failed to claim work: ${item.issueIdentifier}`);
689
+ }
690
+ }
691
+ }
692
+ // Handle pending prompts for active sessions
693
+ if (pollResult.hasPendingPrompts) {
694
+ for (const [sessionId, prompts] of Object.entries(pollResult.pendingPrompts)) {
695
+ for (const prompt of prompts) {
696
+ log.info('Processing pending prompt', {
697
+ sessionId: sessionId.substring(0, 8),
698
+ promptId: prompt.id,
699
+ promptLength: prompt.prompt.length,
700
+ userName: prompt.userName,
701
+ });
702
+ const orchestrator = activeOrchestrators.get(sessionId);
703
+ if (!orchestrator) {
704
+ log.warn('No active orchestrator found for session', {
705
+ sessionId: sessionId.substring(0, 8),
706
+ promptId: prompt.id,
707
+ });
708
+ continue;
709
+ }
710
+ const agent = orchestrator.getAgentBySession(sessionId);
711
+ const providerSessionId = agent?.providerSessionId;
712
+ log.info('Forwarding prompt to provider session', {
713
+ sessionId: sessionId.substring(0, 8),
714
+ promptId: prompt.id,
715
+ hasProviderSession: !!providerSessionId,
716
+ agentStatus: agent?.status,
717
+ });
718
+ try {
719
+ const result = await orchestrator.forwardPrompt(prompt.issueId, sessionId, prompt.prompt, providerSessionId, agent?.workType);
720
+ if (result.forwarded) {
721
+ log.success(result.injected
722
+ ? 'Message injected into running session'
723
+ : 'Prompt forwarded successfully', {
724
+ sessionId: sessionId.substring(0, 8),
725
+ promptId: prompt.id,
726
+ injected: result.injected ?? false,
727
+ resumed: result.resumed,
728
+ newAgentPid: result.agent?.pid,
729
+ });
730
+ const claimResult = await apiRequest(`/api/sessions/${sessionId}/prompts`, {
731
+ method: 'POST',
732
+ body: JSON.stringify({ promptId: prompt.id }),
733
+ });
734
+ if (claimResult?.claimed) {
735
+ log.debug('Prompt claimed', { promptId: prompt.id });
736
+ }
737
+ else {
738
+ log.warn('Failed to claim prompt', { promptId: prompt.id });
739
+ }
740
+ }
741
+ else {
742
+ log.error('Failed to forward prompt', {
743
+ sessionId: sessionId.substring(0, 8),
744
+ promptId: prompt.id,
745
+ reason: result.reason,
746
+ error: result.error?.message,
747
+ });
748
+ }
749
+ }
750
+ catch (error) {
751
+ log.error('Error forwarding prompt', {
752
+ sessionId: sessionId.substring(0, 8),
753
+ promptId: prompt.id,
754
+ error: error instanceof Error ? error.message : String(error),
755
+ });
756
+ }
757
+ }
758
+ }
759
+ }
760
+ }
761
+ catch (error) {
762
+ log.error('Poll loop error', {
763
+ error: error instanceof Error ? error.message : String(error),
764
+ });
765
+ }
766
+ // Wait before next poll
767
+ await new Promise((resolve) => setTimeout(resolve, registration.pollInterval));
768
+ }
769
+ }
770
+ finally {
771
+ signal?.removeEventListener('abort', onAbort);
772
+ // Clean up timers
773
+ if (heartbeatTimer)
774
+ clearInterval(heartbeatTimer);
775
+ // Deregister if we haven't already
776
+ if (workerId && !shutdownInProgress) {
777
+ await deregister().catch(() => { });
778
+ }
779
+ log.status('stopped', 'Shutdown complete');
780
+ }
781
+ }