@supaku/agentfactory-cli 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/analyze-logs.d.ts +26 -0
- package/dist/src/analyze-logs.d.ts.map +1 -0
- package/dist/src/analyze-logs.js +317 -0
- package/dist/src/cleanup.d.ts +21 -0
- package/dist/src/cleanup.d.ts.map +1 -0
- package/dist/src/cleanup.js +309 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +16 -0
- package/dist/src/queue-admin.d.ts +24 -0
- package/dist/src/queue-admin.d.ts.map +1 -0
- package/dist/src/queue-admin.js +418 -0
- package/dist/src/worker-fleet.d.ts +23 -0
- package/dist/src/worker-fleet.d.ts.map +1 -0
- package/dist/src/worker-fleet.js +256 -0
- package/dist/src/worker.d.ts +10 -10
- package/dist/src/worker.js +734 -50
- package/package.json +11 -5
package/dist/src/worker.js
CHANGED
|
@@ -2,45 +2,88 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* AgentFactory Worker CLI
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
* and processes assigned agent sessions.
|
|
5
|
+
* Local worker that polls the coordinator for work and executes agents.
|
|
7
6
|
*
|
|
8
7
|
* Usage:
|
|
9
8
|
* af-worker [options]
|
|
10
9
|
*
|
|
11
10
|
* Options:
|
|
12
|
-
* --capacity <number> Maximum concurrent agents (default:
|
|
13
|
-
* --
|
|
14
|
-
* --api-
|
|
11
|
+
* --capacity <number> Maximum concurrent agents (default: 3)
|
|
12
|
+
* --hostname <name> Worker hostname (default: os.hostname())
|
|
13
|
+
* --api-url <url> Coordinator API URL (default: WORKER_API_URL env)
|
|
14
|
+
* --api-key <key> API key (default: WORKER_API_KEY env)
|
|
15
|
+
* --dry-run Poll but don't execute work
|
|
15
16
|
*
|
|
16
|
-
* Environment:
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* WORKER_API_KEY API key (alternative to --api-key)
|
|
17
|
+
* Environment (loaded from .env.local in CWD):
|
|
18
|
+
* WORKER_API_URL Coordinator API URL (e.g., https://agent.example.com)
|
|
19
|
+
* WORKER_API_KEY API key for authentication
|
|
20
|
+
* LINEAR_API_KEY Required for agent operations
|
|
21
21
|
*/
|
|
22
22
|
import path from 'path';
|
|
23
|
+
import { execSync } from 'child_process';
|
|
23
24
|
import { config } from 'dotenv';
|
|
24
|
-
// Load environment variables
|
|
25
|
+
// Load environment variables from .env.local in CWD
|
|
25
26
|
config({ path: path.resolve(process.cwd(), '.env.local') });
|
|
27
|
+
import os from 'os';
|
|
28
|
+
import { createOrchestrator, createLogger } from '@supaku/agentfactory';
|
|
29
|
+
/**
|
|
30
|
+
* Get the git repository root directory
|
|
31
|
+
*/
|
|
32
|
+
function getGitRoot() {
|
|
33
|
+
try {
|
|
34
|
+
return execSync('git rev-parse --show-toplevel', {
|
|
35
|
+
encoding: 'utf-8',
|
|
36
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
37
|
+
}).trim();
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return process.cwd();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
// Git root for worktree paths
|
|
44
|
+
const gitRoot = getGitRoot();
|
|
45
|
+
// Global state
|
|
46
|
+
let workerId = null;
|
|
47
|
+
let workerShortId = null;
|
|
48
|
+
let activeCount = 0;
|
|
49
|
+
let running = true;
|
|
50
|
+
let heartbeatInterval = null;
|
|
51
|
+
let pollInterval = null;
|
|
52
|
+
// Heartbeat failure tracking
|
|
53
|
+
let consecutiveHeartbeatFailures = 0;
|
|
54
|
+
const MAX_HEARTBEAT_FAILURES = 3;
|
|
55
|
+
// Shutdown state
|
|
56
|
+
let shutdownInProgress = false;
|
|
57
|
+
// Track active orchestrators by sessionId for prompt forwarding
|
|
58
|
+
const activeOrchestrators = new Map();
|
|
59
|
+
// Logger instance - will be configured after registration
|
|
60
|
+
let log = createLogger({}, { showTimestamp: true });
|
|
26
61
|
function parseArgs() {
|
|
27
62
|
const args = process.argv.slice(2);
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
63
|
+
const workerConfig = {
|
|
64
|
+
apiUrl: process.env.WORKER_API_URL || 'https://agent.example.com',
|
|
65
|
+
apiKey: process.env.WORKER_API_KEY || '',
|
|
66
|
+
hostname: os.hostname(),
|
|
67
|
+
capacity: 3,
|
|
68
|
+
dryRun: false,
|
|
32
69
|
};
|
|
33
70
|
for (let i = 0; i < args.length; i++) {
|
|
34
71
|
const arg = args[i];
|
|
35
72
|
switch (arg) {
|
|
36
73
|
case '--capacity':
|
|
37
|
-
|
|
74
|
+
workerConfig.capacity = parseInt(args[++i], 10);
|
|
75
|
+
break;
|
|
76
|
+
case '--hostname':
|
|
77
|
+
workerConfig.hostname = args[++i];
|
|
38
78
|
break;
|
|
39
79
|
case '--api-url':
|
|
40
|
-
|
|
80
|
+
workerConfig.apiUrl = args[++i];
|
|
41
81
|
break;
|
|
42
82
|
case '--api-key':
|
|
43
|
-
|
|
83
|
+
workerConfig.apiKey = args[++i];
|
|
84
|
+
break;
|
|
85
|
+
case '--dry-run':
|
|
86
|
+
workerConfig.dryRun = true;
|
|
44
87
|
break;
|
|
45
88
|
case '--help':
|
|
46
89
|
case '-h':
|
|
@@ -48,7 +91,7 @@ function parseArgs() {
|
|
|
48
91
|
process.exit(0);
|
|
49
92
|
}
|
|
50
93
|
}
|
|
51
|
-
return
|
|
94
|
+
return workerConfig;
|
|
52
95
|
}
|
|
53
96
|
function printHelp() {
|
|
54
97
|
console.log(`
|
|
@@ -58,49 +101,690 @@ Usage:
|
|
|
58
101
|
af-worker [options]
|
|
59
102
|
|
|
60
103
|
Options:
|
|
61
|
-
--capacity <number> Maximum concurrent agents (default:
|
|
62
|
-
--
|
|
63
|
-
--api-
|
|
104
|
+
--capacity <number> Maximum concurrent agents (default: 3)
|
|
105
|
+
--hostname <name> Worker hostname (default: ${os.hostname()})
|
|
106
|
+
--api-url <url> Coordinator API URL
|
|
107
|
+
--api-key <key> API key for authentication
|
|
108
|
+
--dry-run Poll but don't execute work
|
|
64
109
|
--help, -h Show this help message
|
|
65
110
|
|
|
66
|
-
Environment:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
111
|
+
Environment (loaded from .env.local in CWD):
|
|
112
|
+
WORKER_API_URL Coordinator API URL
|
|
113
|
+
WORKER_API_KEY API key for authentication
|
|
114
|
+
LINEAR_API_KEY Required for agent operations
|
|
115
|
+
|
|
116
|
+
Examples:
|
|
117
|
+
# Start worker with default settings
|
|
118
|
+
af-worker
|
|
119
|
+
|
|
120
|
+
# Start with custom capacity
|
|
121
|
+
af-worker --capacity 5
|
|
122
|
+
|
|
123
|
+
# Test polling without executing
|
|
124
|
+
af-worker --dry-run
|
|
71
125
|
`);
|
|
72
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Create a child logger with agent/issue context
|
|
129
|
+
*/
|
|
130
|
+
function createAgentLogger(issueIdentifier) {
|
|
131
|
+
return log.child({ issueIdentifier });
|
|
132
|
+
}
|
|
133
|
+
async function apiRequest(workerConfig, apiPath, options = {}, retries = 3) {
|
|
134
|
+
const result = await apiRequestWithError(workerConfig, apiPath, options, retries);
|
|
135
|
+
return result.data;
|
|
136
|
+
}
|
|
137
|
+
async function apiRequestWithError(workerConfig, apiPath, options = {}, retries = 3) {
|
|
138
|
+
const url = `${workerConfig.apiUrl}${apiPath}`;
|
|
139
|
+
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
140
|
+
try {
|
|
141
|
+
const response = await fetch(url, {
|
|
142
|
+
...options,
|
|
143
|
+
headers: {
|
|
144
|
+
'Content-Type': 'application/json',
|
|
145
|
+
Authorization: `Bearer ${workerConfig.apiKey}`,
|
|
146
|
+
...options.headers,
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
if (!response.ok) {
|
|
150
|
+
const errorBody = await response.text();
|
|
151
|
+
// Check for "Worker not found" specifically
|
|
152
|
+
if (response.status === 404 && errorBody.includes('Worker not found')) {
|
|
153
|
+
log.warn(`Worker not found on server: ${apiPath}`, { status: response.status });
|
|
154
|
+
return { data: null, error: { type: 'worker_not_found' } };
|
|
155
|
+
}
|
|
156
|
+
log.error(`API request failed: ${apiPath}`, { status: response.status, body: errorBody });
|
|
157
|
+
return { data: null, error: { type: 'server_error', status: response.status, body: errorBody } };
|
|
158
|
+
}
|
|
159
|
+
return { data: (await response.json()), error: null };
|
|
160
|
+
}
|
|
161
|
+
catch (error) {
|
|
162
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
163
|
+
const isLastAttempt = attempt === retries;
|
|
164
|
+
if (isLastAttempt) {
|
|
165
|
+
log.error(`API request error: ${apiPath}`, { error: errorMsg, attempts: attempt });
|
|
166
|
+
return { data: null, error: { type: 'network_error', message: errorMsg } };
|
|
167
|
+
}
|
|
168
|
+
// Exponential backoff: 1s, 2s, 4s
|
|
169
|
+
const delay = Math.pow(2, attempt - 1) * 1000;
|
|
170
|
+
log.warn(`API request failed, retrying in ${delay}ms: ${apiPath}`, {
|
|
171
|
+
error: errorMsg,
|
|
172
|
+
attempt,
|
|
173
|
+
maxRetries: retries,
|
|
174
|
+
});
|
|
175
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return { data: null, error: { type: 'network_error', message: 'Max retries exceeded' } };
|
|
179
|
+
}
|
|
180
|
+
async function register(workerConfig) {
|
|
181
|
+
log.info('Registering with coordinator', {
|
|
182
|
+
apiUrl: workerConfig.apiUrl,
|
|
183
|
+
hostname: workerConfig.hostname,
|
|
184
|
+
capacity: workerConfig.capacity,
|
|
185
|
+
});
|
|
186
|
+
const result = await apiRequest(workerConfig, '/api/workers/register', {
|
|
187
|
+
method: 'POST',
|
|
188
|
+
body: JSON.stringify({
|
|
189
|
+
hostname: workerConfig.hostname,
|
|
190
|
+
capacity: workerConfig.capacity,
|
|
191
|
+
version: '1.0.0',
|
|
192
|
+
}),
|
|
193
|
+
});
|
|
194
|
+
if (result) {
|
|
195
|
+
log.status('registered', `Worker ID: ${result.workerId.substring(0, 8)}`);
|
|
196
|
+
}
|
|
197
|
+
return result;
|
|
198
|
+
}
|
|
199
|
+
// Flag to prevent multiple concurrent re-registration attempts
|
|
200
|
+
let reregistrationInProgress = false;
|
|
201
|
+
/**
|
|
202
|
+
* Transfer session ownership to a new worker ID
|
|
203
|
+
*/
|
|
204
|
+
async function transferSessionOwnership(workerConfig, sessionId, newWorkerId, oldWorkerId) {
|
|
205
|
+
const result = await apiRequest(workerConfig, `/api/sessions/${sessionId}/transfer-ownership`, {
|
|
206
|
+
method: 'POST',
|
|
207
|
+
body: JSON.stringify({ newWorkerId, oldWorkerId }),
|
|
208
|
+
});
|
|
209
|
+
if (result?.transferred) {
|
|
210
|
+
log.debug('Session ownership transferred', {
|
|
211
|
+
sessionId: sessionId.substring(0, 8),
|
|
212
|
+
oldWorkerId: oldWorkerId.substring(0, 8),
|
|
213
|
+
newWorkerId: newWorkerId.substring(0, 8),
|
|
214
|
+
});
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
log.warn('Failed to transfer session ownership', {
|
|
219
|
+
sessionId: sessionId.substring(0, 8),
|
|
220
|
+
reason: result?.reason,
|
|
221
|
+
});
|
|
222
|
+
return false;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
async function attemptReregistration(workerConfig) {
|
|
226
|
+
if (reregistrationInProgress) {
|
|
227
|
+
log.debug('Re-registration already in progress, skipping');
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
reregistrationInProgress = true;
|
|
231
|
+
const oldWorkerId = workerId;
|
|
232
|
+
log.warn('Worker not found on server - attempting to re-register');
|
|
233
|
+
try {
|
|
234
|
+
const registration = await register(workerConfig);
|
|
235
|
+
if (registration) {
|
|
236
|
+
const newWorkerId = registration.workerId;
|
|
237
|
+
workerId = newWorkerId;
|
|
238
|
+
workerShortId = newWorkerId.substring(4, 8); // Skip 'wkr_' prefix
|
|
239
|
+
consecutiveHeartbeatFailures = 0;
|
|
240
|
+
log.status('re-registered', `New Worker ID: ${workerShortId}`);
|
|
241
|
+
// Transfer ownership of active sessions to the new worker ID
|
|
242
|
+
if (oldWorkerId && activeOrchestrators.size > 0) {
|
|
243
|
+
log.info('Transferring ownership of active sessions', {
|
|
244
|
+
sessionCount: activeOrchestrators.size,
|
|
245
|
+
oldWorkerId: oldWorkerId.substring(0, 8),
|
|
246
|
+
newWorkerId: newWorkerId.substring(0, 8),
|
|
247
|
+
});
|
|
248
|
+
const transferPromises = [];
|
|
249
|
+
for (const sessionId of activeOrchestrators.keys()) {
|
|
250
|
+
transferPromises.push(transferSessionOwnership(workerConfig, sessionId, newWorkerId, oldWorkerId));
|
|
251
|
+
}
|
|
252
|
+
const results = await Promise.all(transferPromises);
|
|
253
|
+
const successCount = results.filter(Boolean).length;
|
|
254
|
+
log.info('Session ownership transfer complete', {
|
|
255
|
+
total: results.length,
|
|
256
|
+
succeeded: successCount,
|
|
257
|
+
failed: results.length - successCount,
|
|
258
|
+
});
|
|
259
|
+
// Update worker ID in all active orchestrators' activity emitters
|
|
260
|
+
for (const [sessionId, orchestrator] of activeOrchestrators.entries()) {
|
|
261
|
+
orchestrator.updateWorkerId(newWorkerId);
|
|
262
|
+
log.debug('Updated orchestrator worker ID', {
|
|
263
|
+
sessionId: sessionId.substring(0, 8),
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return true;
|
|
268
|
+
}
|
|
269
|
+
log.error('Re-registration failed');
|
|
270
|
+
return false;
|
|
271
|
+
}
|
|
272
|
+
finally {
|
|
273
|
+
reregistrationInProgress = false;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
async function sendHeartbeat(workerConfig) {
|
|
277
|
+
if (!workerId)
|
|
278
|
+
return;
|
|
279
|
+
const result = await apiRequestWithError(workerConfig, `/api/workers/${workerId}/heartbeat`, {
|
|
280
|
+
method: 'POST',
|
|
281
|
+
body: JSON.stringify({
|
|
282
|
+
activeCount,
|
|
283
|
+
load: {
|
|
284
|
+
cpu: os.loadavg()[0],
|
|
285
|
+
memory: 1 - os.freemem() / os.totalmem(),
|
|
286
|
+
},
|
|
287
|
+
}),
|
|
288
|
+
});
|
|
289
|
+
if (result.data) {
|
|
290
|
+
consecutiveHeartbeatFailures = 0;
|
|
291
|
+
log.debug('Heartbeat acknowledged', {
|
|
292
|
+
activeCount,
|
|
293
|
+
pendingWorkCount: result.data.pendingWorkCount,
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
else if (result.error?.type === 'worker_not_found') {
|
|
297
|
+
consecutiveHeartbeatFailures++;
|
|
298
|
+
await attemptReregistration(workerConfig);
|
|
299
|
+
}
|
|
300
|
+
else {
|
|
301
|
+
consecutiveHeartbeatFailures++;
|
|
302
|
+
log.warn('Heartbeat failed', {
|
|
303
|
+
consecutiveFailures: consecutiveHeartbeatFailures,
|
|
304
|
+
maxFailures: MAX_HEARTBEAT_FAILURES,
|
|
305
|
+
errorType: result.error?.type,
|
|
306
|
+
});
|
|
307
|
+
if (consecutiveHeartbeatFailures >= MAX_HEARTBEAT_FAILURES) {
|
|
308
|
+
log.error('Multiple heartbeat failures - checking if re-registration needed', {
|
|
309
|
+
consecutiveFailures: consecutiveHeartbeatFailures,
|
|
310
|
+
});
|
|
311
|
+
await attemptReregistration(workerConfig);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
async function pollForWork(workerConfig) {
|
|
316
|
+
if (!workerId)
|
|
317
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
318
|
+
const result = await apiRequestWithError(workerConfig, `/api/workers/${workerId}/poll`);
|
|
319
|
+
// Handle worker not found - trigger re-registration
|
|
320
|
+
if (result.error?.type === 'worker_not_found') {
|
|
321
|
+
await attemptReregistration(workerConfig);
|
|
322
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
323
|
+
}
|
|
324
|
+
if (!result.data) {
|
|
325
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
326
|
+
}
|
|
327
|
+
const pollData = result.data;
|
|
328
|
+
// Log when we receive pending prompts
|
|
329
|
+
if (pollData.hasPendingPrompts) {
|
|
330
|
+
const totalPrompts = Object.values(pollData.pendingPrompts).reduce((sum, prompts) => sum + prompts.length, 0);
|
|
331
|
+
log.info('Received pending prompts', {
|
|
332
|
+
sessionCount: Object.keys(pollData.pendingPrompts).length,
|
|
333
|
+
totalPrompts,
|
|
334
|
+
sessions: Object.entries(pollData.pendingPrompts).map(([sessionId, prompts]) => ({
|
|
335
|
+
sessionId: sessionId.substring(0, 8),
|
|
336
|
+
promptCount: prompts.length,
|
|
337
|
+
promptIds: prompts.map((p) => p.id),
|
|
338
|
+
})),
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
return pollData;
|
|
342
|
+
}
|
|
343
|
+
async function claimWork(workerConfig, sessionId) {
|
|
344
|
+
if (!workerId)
|
|
345
|
+
return null;
|
|
346
|
+
return apiRequest(workerConfig, `/api/sessions/${sessionId}/claim`, {
|
|
347
|
+
method: 'POST',
|
|
348
|
+
body: JSON.stringify({ workerId }),
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
async function reportStatus(workerConfig, sessionId, status, extra) {
|
|
352
|
+
if (!workerId)
|
|
353
|
+
return;
|
|
354
|
+
await apiRequest(workerConfig, `/api/sessions/${sessionId}/status`, {
|
|
355
|
+
method: 'POST',
|
|
356
|
+
body: JSON.stringify({
|
|
357
|
+
workerId,
|
|
358
|
+
status,
|
|
359
|
+
...extra,
|
|
360
|
+
}),
|
|
361
|
+
});
|
|
362
|
+
log.debug(`Reported status: ${status}`, { sessionId });
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Post a progress update comment to the Linear issue thread
|
|
366
|
+
*/
|
|
367
|
+
async function postProgress(workerConfig, sessionId, milestone, message) {
|
|
368
|
+
if (!workerId)
|
|
369
|
+
return;
|
|
370
|
+
const result = await apiRequest(workerConfig, `/api/sessions/${sessionId}/progress`, {
|
|
371
|
+
method: 'POST',
|
|
372
|
+
body: JSON.stringify({
|
|
373
|
+
workerId,
|
|
374
|
+
milestone,
|
|
375
|
+
message,
|
|
376
|
+
}),
|
|
377
|
+
});
|
|
378
|
+
if (result?.posted) {
|
|
379
|
+
log.debug(`Progress posted: ${milestone}`, { sessionId });
|
|
380
|
+
}
|
|
381
|
+
else {
|
|
382
|
+
log.warn(`Failed to post progress: ${milestone}`, { reason: result?.reason });
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Check if session has been stopped (via Linear stop button)
|
|
387
|
+
*/
|
|
388
|
+
async function checkSessionStopped(workerConfig, sessionId) {
|
|
389
|
+
const result = await apiRequest(workerConfig, `/api/sessions/${sessionId}/status`);
|
|
390
|
+
return result?.status === 'stopped';
|
|
391
|
+
}
|
|
392
|
+
async function deregister(workerConfig) {
|
|
393
|
+
if (!workerId)
|
|
394
|
+
return;
|
|
395
|
+
log.info('Deregistering worker');
|
|
396
|
+
const result = await apiRequest(workerConfig, `/api/workers/${workerId}`, {
|
|
397
|
+
method: 'DELETE',
|
|
398
|
+
});
|
|
399
|
+
if (result) {
|
|
400
|
+
log.status('stopped', `Unclaimed sessions: ${result.unclaimedSessions.length}`);
|
|
401
|
+
}
|
|
402
|
+
workerId = null;
|
|
403
|
+
}
|
|
404
|
+
async function executeWork(workerConfig, work) {
|
|
405
|
+
const agentLog = createAgentLogger(work.issueIdentifier);
|
|
406
|
+
const isResume = !!work.claudeSessionId;
|
|
407
|
+
agentLog.section(`${isResume ? 'Resuming' : 'Starting'} work on ${work.issueIdentifier}`);
|
|
408
|
+
agentLog.info('Work details', {
|
|
409
|
+
hasPrompt: !!work.prompt,
|
|
410
|
+
isResume,
|
|
411
|
+
workType: work.workType,
|
|
412
|
+
});
|
|
413
|
+
activeCount++;
|
|
414
|
+
// Two-phase completion: set in try/catch, read in finally
|
|
415
|
+
let finalStatus = 'failed';
|
|
416
|
+
let statusPayload;
|
|
417
|
+
// Issue lock TTL refresher
|
|
418
|
+
let lockRefresher = null;
|
|
419
|
+
try {
|
|
420
|
+
await reportStatus(workerConfig, work.sessionId, 'running');
|
|
421
|
+
// Start lock TTL refresher (refresh every 60s, lock TTL is 2 hours)
|
|
422
|
+
if (work.issueId) {
|
|
423
|
+
lockRefresher = setInterval(async () => {
|
|
424
|
+
try {
|
|
425
|
+
const response = await apiRequest(workerConfig, `/api/sessions/${work.sessionId}/lock-refresh`, {
|
|
426
|
+
method: 'POST',
|
|
427
|
+
body: JSON.stringify({ workerId, issueId: work.issueId }),
|
|
428
|
+
});
|
|
429
|
+
if (response?.refreshed) {
|
|
430
|
+
agentLog.debug('Issue lock TTL refreshed');
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
catch {
|
|
434
|
+
// Non-fatal — lock has a 2hr TTL so missing one refresh is fine
|
|
435
|
+
}
|
|
436
|
+
}, 60_000);
|
|
437
|
+
}
|
|
438
|
+
// Post initial progress
|
|
439
|
+
await postProgress(workerConfig, work.sessionId, isResume ? 'resumed' : 'claimed', isResume
|
|
440
|
+
? `Resuming work on ${work.issueIdentifier}`
|
|
441
|
+
: `Worker claimed ${work.issueIdentifier}. Setting up environment...`);
|
|
442
|
+
// Create orchestrator with API activity proxy
|
|
443
|
+
const orchestrator = createOrchestrator({
|
|
444
|
+
maxConcurrent: 1,
|
|
445
|
+
worktreePath: path.resolve(gitRoot, '.worktrees'),
|
|
446
|
+
apiActivityConfig: {
|
|
447
|
+
baseUrl: workerConfig.apiUrl,
|
|
448
|
+
apiKey: workerConfig.apiKey,
|
|
449
|
+
workerId: workerId,
|
|
450
|
+
},
|
|
451
|
+
}, {
|
|
452
|
+
onIssueSelected: (issue) => {
|
|
453
|
+
agentLog.info('Issue fetched', {
|
|
454
|
+
title: issue.title.slice(0, 50),
|
|
455
|
+
labels: issue.labels.join(', '),
|
|
456
|
+
});
|
|
457
|
+
},
|
|
458
|
+
onAgentStart: (agent) => {
|
|
459
|
+
agentLog.status('running', `PID: ${agent.pid}`);
|
|
460
|
+
agentLog.debug('Agent details', {
|
|
461
|
+
worktree: agent.worktreePath,
|
|
462
|
+
});
|
|
463
|
+
reportStatus(workerConfig, work.sessionId, 'running', {
|
|
464
|
+
claudeSessionId: agent.sessionId,
|
|
465
|
+
worktreePath: agent.worktreePath,
|
|
466
|
+
});
|
|
467
|
+
postProgress(workerConfig, work.sessionId, 'started', `Agent started working on ${agent.identifier}`);
|
|
468
|
+
},
|
|
469
|
+
onAgentComplete: (agent) => {
|
|
470
|
+
agentLog.status('completed', `Exit code: ${agent.exitCode}`);
|
|
471
|
+
},
|
|
472
|
+
onAgentError: (_agent, error) => {
|
|
473
|
+
agentLog.error('Agent error', { error: error.message });
|
|
474
|
+
},
|
|
475
|
+
onAgentStopped: (_agent) => {
|
|
476
|
+
agentLog.status('stopped');
|
|
477
|
+
},
|
|
478
|
+
onAgentIncomplete: (agent) => {
|
|
479
|
+
agentLog.warn('Agent incomplete - worktree preserved', {
|
|
480
|
+
reason: agent.incompleteReason,
|
|
481
|
+
worktreePath: agent.worktreePath,
|
|
482
|
+
});
|
|
483
|
+
},
|
|
484
|
+
onClaudeSessionId: (_linearSessionId, claudeSessionId) => {
|
|
485
|
+
agentLog.debug('Claude session captured', { claudeSessionId });
|
|
486
|
+
reportStatus(workerConfig, work.sessionId, 'running', {
|
|
487
|
+
claudeSessionId,
|
|
488
|
+
});
|
|
489
|
+
},
|
|
490
|
+
});
|
|
491
|
+
// Store orchestrator for prompt forwarding
|
|
492
|
+
activeOrchestrators.set(work.sessionId, orchestrator);
|
|
493
|
+
agentLog.debug('Orchestrator registered for session', {
|
|
494
|
+
sessionId: work.sessionId.substring(0, 8),
|
|
495
|
+
});
|
|
496
|
+
let spawnedAgent;
|
|
497
|
+
// Retry configuration for "agent already running" conflicts
|
|
498
|
+
const MAX_SPAWN_RETRIES = 6;
|
|
499
|
+
const SPAWN_RETRY_DELAY_MS = 15000;
|
|
500
|
+
if (work.claudeSessionId) {
|
|
501
|
+
// Resume existing Claude session
|
|
502
|
+
agentLog.info('Resuming Claude session', {
|
|
503
|
+
claudeSessionId: work.claudeSessionId.substring(0, 12),
|
|
504
|
+
});
|
|
505
|
+
const prompt = work.prompt || `Continue work on ${work.issueIdentifier}`;
|
|
506
|
+
const result = await orchestrator.forwardPrompt(work.issueId, work.sessionId, prompt, work.claudeSessionId, work.workType);
|
|
507
|
+
if (!result.forwarded || !result.agent) {
|
|
508
|
+
throw new Error(`Failed to resume session: ${result.reason || 'unknown error'}`);
|
|
509
|
+
}
|
|
510
|
+
agentLog.success('Session resumed');
|
|
511
|
+
spawnedAgent = result.agent;
|
|
512
|
+
}
|
|
513
|
+
else {
|
|
514
|
+
// Fresh start with retry logic
|
|
515
|
+
agentLog.info('Spawning new agent', { workType: work.workType });
|
|
516
|
+
let lastError = null;
|
|
517
|
+
for (let attempt = 1; attempt <= MAX_SPAWN_RETRIES; attempt++) {
|
|
518
|
+
try {
|
|
519
|
+
spawnedAgent = await orchestrator.spawnAgentForIssue(work.issueIdentifier, work.sessionId, work.workType, work.prompt);
|
|
520
|
+
break;
|
|
521
|
+
}
|
|
522
|
+
catch (err) {
|
|
523
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
524
|
+
const isAgentRunning = lastError.message.includes('Agent already running') ||
|
|
525
|
+
lastError.message.includes('Agent is still running');
|
|
526
|
+
const isBranchConflict = lastError.message.includes('already checked out') ||
|
|
527
|
+
lastError.message.includes('is already checked out at');
|
|
528
|
+
const isRetriable = isAgentRunning || isBranchConflict;
|
|
529
|
+
if (isRetriable && attempt < MAX_SPAWN_RETRIES) {
|
|
530
|
+
const reason = isBranchConflict
|
|
531
|
+
? 'Branch in use by another agent'
|
|
532
|
+
: 'Agent still running';
|
|
533
|
+
agentLog.warn(`${reason}, waiting to retry (attempt ${attempt}/${MAX_SPAWN_RETRIES})`, {
|
|
534
|
+
retryInMs: SPAWN_RETRY_DELAY_MS,
|
|
535
|
+
});
|
|
536
|
+
await postProgress(workerConfig, work.sessionId, 'waiting', `${reason}, waiting to retry (${attempt}/${MAX_SPAWN_RETRIES})...`);
|
|
537
|
+
await new Promise(resolve => setTimeout(resolve, SPAWN_RETRY_DELAY_MS));
|
|
538
|
+
}
|
|
539
|
+
else {
|
|
540
|
+
throw lastError;
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
if (!spawnedAgent) {
|
|
545
|
+
throw lastError || new Error('Failed to spawn agent after retries');
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
agentLog.info('Agent spawned', {
|
|
549
|
+
pid: spawnedAgent.pid,
|
|
550
|
+
status: spawnedAgent.status,
|
|
551
|
+
});
|
|
552
|
+
if (!spawnedAgent.pid) {
|
|
553
|
+
agentLog.warn('Agent has no PID - spawn may have failed');
|
|
554
|
+
}
|
|
555
|
+
// Start a stop signal checker
|
|
556
|
+
let stopRequested = false;
|
|
557
|
+
const stopChecker = setInterval(async () => {
|
|
558
|
+
try {
|
|
559
|
+
if (await checkSessionStopped(workerConfig, work.sessionId)) {
|
|
560
|
+
agentLog.warn('Stop signal received');
|
|
561
|
+
stopRequested = true;
|
|
562
|
+
clearInterval(stopChecker);
|
|
563
|
+
await orchestrator.stopAgent(work.issueId, false);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
catch {
|
|
567
|
+
// Ignore errors in stop checker
|
|
568
|
+
}
|
|
569
|
+
}, 5000);
|
|
570
|
+
// Wait for agent to complete
|
|
571
|
+
agentLog.info('Waiting for agent to complete...');
|
|
572
|
+
const results = await orchestrator.waitForAll();
|
|
573
|
+
const agent = results[0];
|
|
574
|
+
clearInterval(stopChecker);
|
|
575
|
+
// Determine final status
|
|
576
|
+
if (stopRequested || agent?.stopReason === 'user_request') {
|
|
577
|
+
finalStatus = 'stopped';
|
|
578
|
+
await reportStatus(workerConfig, work.sessionId, 'finalizing');
|
|
579
|
+
await postProgress(workerConfig, work.sessionId, 'stopped', `Work stopped by user request`);
|
|
580
|
+
agentLog.status('stopped', 'Work stopped by user request');
|
|
581
|
+
}
|
|
582
|
+
else if (agent?.stopReason === 'timeout') {
|
|
583
|
+
finalStatus = 'failed';
|
|
584
|
+
statusPayload = { error: { message: 'Agent timed out' } };
|
|
585
|
+
await reportStatus(workerConfig, work.sessionId, 'finalizing');
|
|
586
|
+
await postProgress(workerConfig, work.sessionId, 'failed', `Work timed out`);
|
|
587
|
+
agentLog.status('stopped', 'Work timed out');
|
|
588
|
+
}
|
|
589
|
+
else if (agent?.status === 'completed') {
|
|
590
|
+
finalStatus = 'completed';
|
|
591
|
+
statusPayload = {
|
|
592
|
+
claudeSessionId: agent.sessionId,
|
|
593
|
+
worktreePath: agent.worktreePath,
|
|
594
|
+
};
|
|
595
|
+
await reportStatus(workerConfig, work.sessionId, 'finalizing');
|
|
596
|
+
await postProgress(workerConfig, work.sessionId, 'completed', `Work completed successfully on ${work.issueIdentifier}`);
|
|
597
|
+
agentLog.success('Work completed successfully');
|
|
598
|
+
}
|
|
599
|
+
else {
|
|
600
|
+
const errorMsg = agent?.error?.message || 'Agent did not complete successfully';
|
|
601
|
+
finalStatus = 'failed';
|
|
602
|
+
statusPayload = { error: { message: errorMsg } };
|
|
603
|
+
await reportStatus(workerConfig, work.sessionId, 'finalizing');
|
|
604
|
+
await postProgress(workerConfig, work.sessionId, 'failed', `Work failed: ${errorMsg}`);
|
|
605
|
+
agentLog.error('Work failed', { error: errorMsg });
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
catch (error) {
|
|
609
|
+
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
|
610
|
+
agentLog.error('Work execution failed', { error: errorMsg });
|
|
611
|
+
finalStatus = 'failed';
|
|
612
|
+
statusPayload = { error: { message: errorMsg } };
|
|
613
|
+
await reportStatus(workerConfig, work.sessionId, 'finalizing').catch(() => { });
|
|
614
|
+
await postProgress(workerConfig, work.sessionId, 'failed', `Work failed: ${errorMsg}`);
|
|
615
|
+
}
|
|
616
|
+
finally {
|
|
617
|
+
if (lockRefresher)
|
|
618
|
+
clearInterval(lockRefresher);
|
|
619
|
+
activeOrchestrators.delete(work.sessionId);
|
|
620
|
+
agentLog.debug('Orchestrator unregistered for session', {
|
|
621
|
+
sessionId: work.sessionId.substring(0, 8),
|
|
622
|
+
});
|
|
623
|
+
activeCount--;
|
|
624
|
+
// Report true terminal status AFTER all cleanup
|
|
625
|
+
await reportStatus(workerConfig, work.sessionId, finalStatus, statusPayload).catch((err) => {
|
|
626
|
+
agentLog.error('Failed to report final status', { error: err instanceof Error ? err.message : String(err) });
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
}
|
|
73
630
|
async function main() {
|
|
74
|
-
const
|
|
631
|
+
const workerConfig = parseArgs();
|
|
632
|
+
if (!workerConfig.apiKey) {
|
|
633
|
+
console.error('Error: WORKER_API_KEY environment variable is required');
|
|
634
|
+
process.exit(1);
|
|
635
|
+
}
|
|
75
636
|
if (!process.env.LINEAR_API_KEY) {
|
|
76
637
|
console.error('Error: LINEAR_API_KEY environment variable is required');
|
|
77
638
|
process.exit(1);
|
|
78
639
|
}
|
|
79
|
-
|
|
80
|
-
|
|
640
|
+
log.section('AgentFactory Worker');
|
|
641
|
+
log.info('Configuration', {
|
|
642
|
+
apiUrl: workerConfig.apiUrl,
|
|
643
|
+
hostname: workerConfig.hostname,
|
|
644
|
+
capacity: workerConfig.capacity,
|
|
645
|
+
dryRun: workerConfig.dryRun,
|
|
646
|
+
});
|
|
647
|
+
// Register with coordinator
|
|
648
|
+
const registration = await register(workerConfig);
|
|
649
|
+
if (!registration) {
|
|
650
|
+
log.error('Failed to register with coordinator');
|
|
81
651
|
process.exit(1);
|
|
82
652
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
653
|
+
workerId = registration.workerId;
|
|
654
|
+
workerShortId = registration.workerId.substring(0, 8);
|
|
655
|
+
// Update logger with worker context
|
|
656
|
+
log = createLogger({ workerId, workerShortId }, { showTimestamp: true });
|
|
657
|
+
// Set up heartbeat
|
|
658
|
+
heartbeatInterval = setInterval(() => sendHeartbeat(workerConfig), registration.heartbeatInterval);
|
|
659
|
+
// Set up graceful shutdown
|
|
660
|
+
const shutdown = () => {
|
|
661
|
+
if (shutdownInProgress)
|
|
662
|
+
return;
|
|
663
|
+
shutdownInProgress = true;
|
|
664
|
+
log.warn('Shutting down...');
|
|
665
|
+
running = false;
|
|
666
|
+
if (heartbeatInterval)
|
|
667
|
+
clearInterval(heartbeatInterval);
|
|
668
|
+
if (pollInterval)
|
|
669
|
+
clearInterval(pollInterval);
|
|
670
|
+
// Fire and forget - server will clean up via heartbeat timeout
|
|
671
|
+
deregister(workerConfig).catch(() => { });
|
|
672
|
+
log.status('stopped', 'Shutdown complete');
|
|
94
673
|
process.exit(0);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
//
|
|
99
|
-
|
|
100
|
-
//
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
674
|
+
};
|
|
675
|
+
process.on('SIGINT', shutdown);
|
|
676
|
+
process.on('SIGTERM', shutdown);
|
|
677
|
+
// Send initial heartbeat
|
|
678
|
+
await sendHeartbeat(workerConfig);
|
|
679
|
+
// Main poll loop
|
|
680
|
+
log.info('Starting poll loop...');
|
|
681
|
+
while (running) {
|
|
682
|
+
try {
|
|
683
|
+
const availableCapacity = workerConfig.capacity - activeCount;
|
|
684
|
+
const pollResult = await pollForWork(workerConfig);
|
|
685
|
+
// Handle new work items if we have capacity
|
|
686
|
+
if (availableCapacity > 0 && pollResult.work.length > 0) {
|
|
687
|
+
log.info(`Found ${pollResult.work.length} work item(s)`, {
|
|
688
|
+
activeCount,
|
|
689
|
+
availableCapacity,
|
|
690
|
+
});
|
|
691
|
+
for (const item of pollResult.work.slice(0, availableCapacity)) {
|
|
692
|
+
if (!running)
|
|
693
|
+
break;
|
|
694
|
+
const claimResult = await claimWork(workerConfig, item.sessionId);
|
|
695
|
+
if (claimResult?.claimed) {
|
|
696
|
+
log.status('claimed', item.issueIdentifier);
|
|
697
|
+
if (workerConfig.dryRun) {
|
|
698
|
+
log.info(`[DRY RUN] Would execute: ${item.issueIdentifier}`);
|
|
699
|
+
}
|
|
700
|
+
else {
|
|
701
|
+
executeWork(workerConfig, item).catch((error) => {
|
|
702
|
+
log.error('Background work execution failed', {
|
|
703
|
+
error: error instanceof Error ? error.message : String(error),
|
|
704
|
+
});
|
|
705
|
+
});
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
else {
|
|
709
|
+
log.warn(`Failed to claim work: ${item.issueIdentifier}`);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
// Handle pending prompts for active sessions
|
|
714
|
+
if (pollResult.hasPendingPrompts) {
|
|
715
|
+
for (const [sessionId, prompts] of Object.entries(pollResult.pendingPrompts)) {
|
|
716
|
+
for (const prompt of prompts) {
|
|
717
|
+
log.info('Processing pending prompt', {
|
|
718
|
+
sessionId: sessionId.substring(0, 8),
|
|
719
|
+
promptId: prompt.id,
|
|
720
|
+
promptLength: prompt.prompt.length,
|
|
721
|
+
userName: prompt.userName,
|
|
722
|
+
});
|
|
723
|
+
const orchestrator = activeOrchestrators.get(sessionId);
|
|
724
|
+
if (!orchestrator) {
|
|
725
|
+
log.warn('No active orchestrator found for session', {
|
|
726
|
+
sessionId: sessionId.substring(0, 8),
|
|
727
|
+
promptId: prompt.id,
|
|
728
|
+
});
|
|
729
|
+
continue;
|
|
730
|
+
}
|
|
731
|
+
const agent = orchestrator.getAgentBySession(sessionId);
|
|
732
|
+
const claudeSessionId = agent?.claudeSessionId;
|
|
733
|
+
log.info('Forwarding prompt to Claude session', {
|
|
734
|
+
sessionId: sessionId.substring(0, 8),
|
|
735
|
+
promptId: prompt.id,
|
|
736
|
+
hasClaudeSession: !!claudeSessionId,
|
|
737
|
+
agentStatus: agent?.status,
|
|
738
|
+
});
|
|
739
|
+
try {
|
|
740
|
+
const result = await orchestrator.forwardPrompt(prompt.issueId, sessionId, prompt.prompt, claudeSessionId, agent?.workType);
|
|
741
|
+
if (result.forwarded) {
|
|
742
|
+
log.success(result.injected ? 'Message injected into running session' : 'Prompt forwarded successfully', {
|
|
743
|
+
sessionId: sessionId.substring(0, 8),
|
|
744
|
+
promptId: prompt.id,
|
|
745
|
+
injected: result.injected ?? false,
|
|
746
|
+
resumed: result.resumed,
|
|
747
|
+
newAgentPid: result.agent?.pid,
|
|
748
|
+
});
|
|
749
|
+
const claimResult = await apiRequest(workerConfig, `/api/sessions/${sessionId}/prompts`, {
|
|
750
|
+
method: 'POST',
|
|
751
|
+
body: JSON.stringify({ promptId: prompt.id }),
|
|
752
|
+
});
|
|
753
|
+
if (claimResult?.claimed) {
|
|
754
|
+
log.debug('Prompt claimed', { promptId: prompt.id });
|
|
755
|
+
}
|
|
756
|
+
else {
|
|
757
|
+
log.warn('Failed to claim prompt', { promptId: prompt.id });
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
else {
|
|
761
|
+
log.error('Failed to forward prompt', {
|
|
762
|
+
sessionId: sessionId.substring(0, 8),
|
|
763
|
+
promptId: prompt.id,
|
|
764
|
+
reason: result.reason,
|
|
765
|
+
error: result.error?.message,
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
catch (error) {
|
|
770
|
+
log.error('Error forwarding prompt', {
|
|
771
|
+
sessionId: sessionId.substring(0, 8),
|
|
772
|
+
promptId: prompt.id,
|
|
773
|
+
error: error instanceof Error ? error.message : String(error),
|
|
774
|
+
});
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
catch (error) {
|
|
781
|
+
log.error('Poll loop error', {
|
|
782
|
+
error: error instanceof Error ? error.message : String(error),
|
|
783
|
+
});
|
|
784
|
+
}
|
|
785
|
+
// Wait before next poll
|
|
786
|
+
await new Promise((resolve) => setTimeout(resolve, registration.pollInterval));
|
|
787
|
+
}
|
|
104
788
|
}
|
|
105
789
|
main().catch((error) => {
|
|
106
790
|
console.error('Fatal error:', error);
|