@supaku/agentfactory-cli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/analyze-logs.d.ts +2 -2
- package/dist/src/analyze-logs.js +23 -194
- package/dist/src/cleanup.d.ts +2 -6
- package/dist/src/cleanup.d.ts.map +1 -1
- package/dist/src/cleanup.js +24 -225
- package/dist/src/lib/analyze-logs-runner.d.ts +47 -0
- package/dist/src/lib/analyze-logs-runner.d.ts.map +1 -0
- package/dist/src/lib/analyze-logs-runner.js +216 -0
- package/dist/src/lib/cleanup-runner.d.ts +28 -0
- package/dist/src/lib/cleanup-runner.d.ts.map +1 -0
- package/dist/src/lib/cleanup-runner.js +224 -0
- package/dist/src/lib/orchestrator-runner.d.ts +45 -0
- package/dist/src/lib/orchestrator-runner.d.ts.map +1 -0
- package/dist/src/lib/orchestrator-runner.js +144 -0
- package/dist/src/lib/queue-admin-runner.d.ts +30 -0
- package/dist/src/lib/queue-admin-runner.d.ts.map +1 -0
- package/dist/src/lib/queue-admin-runner.js +378 -0
- package/dist/src/lib/worker-fleet-runner.d.ts +28 -0
- package/dist/src/lib/worker-fleet-runner.d.ts.map +1 -0
- package/dist/src/lib/worker-fleet-runner.js +224 -0
- package/dist/src/lib/worker-runner.d.ts +31 -0
- package/dist/src/lib/worker-runner.d.ts.map +1 -0
- package/dist/src/lib/worker-runner.js +735 -0
- package/dist/src/orchestrator.d.ts +1 -1
- package/dist/src/orchestrator.js +42 -106
- package/dist/src/queue-admin.d.ts +3 -2
- package/dist/src/queue-admin.d.ts.map +1 -1
- package/dist/src/queue-admin.js +38 -360
- package/dist/src/worker-fleet.d.ts +1 -1
- package/dist/src/worker-fleet.js +23 -162
- package/dist/src/worker.d.ts +1 -0
- package/dist/src/worker.d.ts.map +1 -1
- package/dist/src/worker.js +33 -702
- package/package.json +28 -4
|
@@ -0,0 +1,735 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker Runner — Programmatic API for the remote worker CLI.
|
|
3
|
+
*
|
|
4
|
+
* Encapsulates all global state into the runner function's closure so that
|
|
5
|
+
* multiple workers can be started from the same process (e.g. tests) without
|
|
6
|
+
* leaking state between invocations.
|
|
7
|
+
*/
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import { execSync } from 'child_process';
|
|
10
|
+
import os from 'os';
|
|
11
|
+
import { createOrchestrator, createLogger, } from '@supaku/agentfactory';
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Helpers (stateless)
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
function getGitRoot() {
|
|
16
|
+
try {
|
|
17
|
+
return execSync('git rev-parse --show-toplevel', {
|
|
18
|
+
encoding: 'utf-8',
|
|
19
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
20
|
+
}).trim();
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
return process.cwd();
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const MAX_HEARTBEAT_FAILURES = 3;
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Runner
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
/**
|
|
31
|
+
* Run a worker that polls the coordinator for work and executes agents.
|
|
32
|
+
*
|
|
33
|
+
* All state is encapsulated in the function closure. The caller can cancel
|
|
34
|
+
* via the optional {@link AbortSignal}.
|
|
35
|
+
*/
|
|
36
|
+
export async function runWorker(config, signal) {
|
|
37
|
+
// Resolve config with defaults
|
|
38
|
+
const hostname = config.hostname ?? os.hostname();
|
|
39
|
+
const capacity = config.capacity ?? 3;
|
|
40
|
+
const dryRun = config.dryRun ?? false;
|
|
41
|
+
const gitRoot = config.gitRoot ?? getGitRoot();
|
|
42
|
+
const linearApiKey = config.linearApiKey ?? process.env.LINEAR_API_KEY;
|
|
43
|
+
if (!linearApiKey) {
|
|
44
|
+
throw new Error('LINEAR_API_KEY is required (pass via config.linearApiKey or set env var)');
|
|
45
|
+
}
|
|
46
|
+
// -----------------------------------------------------------------------
|
|
47
|
+
// State (formerly globals)
|
|
48
|
+
// -----------------------------------------------------------------------
|
|
49
|
+
let workerId = null;
|
|
50
|
+
let workerShortId = null;
|
|
51
|
+
let activeCount = 0;
|
|
52
|
+
let running = true;
|
|
53
|
+
let heartbeatTimer = null;
|
|
54
|
+
let shutdownInProgress = false;
|
|
55
|
+
let consecutiveHeartbeatFailures = 0;
|
|
56
|
+
let reregistrationInProgress = false;
|
|
57
|
+
const activeOrchestrators = new Map();
|
|
58
|
+
// Logger — will be re-created after registration with worker context
|
|
59
|
+
let log = createLogger({}, { showTimestamp: true });
|
|
60
|
+
// Internal config object used by API helpers
|
|
61
|
+
const workerConfig = {
|
|
62
|
+
apiUrl: config.apiUrl,
|
|
63
|
+
apiKey: config.apiKey,
|
|
64
|
+
hostname,
|
|
65
|
+
capacity,
|
|
66
|
+
dryRun,
|
|
67
|
+
};
|
|
68
|
+
// -----------------------------------------------------------------------
|
|
69
|
+
// AbortSignal handling
|
|
70
|
+
// -----------------------------------------------------------------------
|
|
71
|
+
const onAbort = () => {
|
|
72
|
+
if (shutdownInProgress)
|
|
73
|
+
return;
|
|
74
|
+
shutdownInProgress = true;
|
|
75
|
+
log.warn('Shutting down (abort signal)...');
|
|
76
|
+
running = false;
|
|
77
|
+
if (heartbeatTimer)
|
|
78
|
+
clearInterval(heartbeatTimer);
|
|
79
|
+
// Fire and forget — server will clean up via heartbeat timeout
|
|
80
|
+
deregister().catch(() => { });
|
|
81
|
+
};
|
|
82
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
83
|
+
// -----------------------------------------------------------------------
|
|
84
|
+
// API helpers (closures over workerConfig & log)
|
|
85
|
+
// -----------------------------------------------------------------------
|
|
86
|
+
async function apiRequestWithError(apiPath, options = {}, retries = 3) {
|
|
87
|
+
const url = `${workerConfig.apiUrl}${apiPath}`;
|
|
88
|
+
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
89
|
+
try {
|
|
90
|
+
const response = await fetch(url, {
|
|
91
|
+
...options,
|
|
92
|
+
headers: {
|
|
93
|
+
'Content-Type': 'application/json',
|
|
94
|
+
Authorization: `Bearer ${workerConfig.apiKey}`,
|
|
95
|
+
...options.headers,
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
if (!response.ok) {
|
|
99
|
+
const errorBody = await response.text();
|
|
100
|
+
if (response.status === 404 && errorBody.includes('Worker not found')) {
|
|
101
|
+
log.warn(`Worker not found on server: ${apiPath}`, { status: response.status });
|
|
102
|
+
return { data: null, error: { type: 'worker_not_found' } };
|
|
103
|
+
}
|
|
104
|
+
log.error(`API request failed: ${apiPath}`, { status: response.status, body: errorBody });
|
|
105
|
+
return { data: null, error: { type: 'server_error', status: response.status, body: errorBody } };
|
|
106
|
+
}
|
|
107
|
+
return { data: (await response.json()), error: null };
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
111
|
+
const isLastAttempt = attempt === retries;
|
|
112
|
+
if (isLastAttempt) {
|
|
113
|
+
log.error(`API request error: ${apiPath}`, { error: errorMsg, attempts: attempt });
|
|
114
|
+
return { data: null, error: { type: 'network_error', message: errorMsg } };
|
|
115
|
+
}
|
|
116
|
+
const delay = Math.pow(2, attempt - 1) * 1000;
|
|
117
|
+
log.warn(`API request failed, retrying in ${delay}ms: ${apiPath}`, {
|
|
118
|
+
error: errorMsg,
|
|
119
|
+
attempt,
|
|
120
|
+
maxRetries: retries,
|
|
121
|
+
});
|
|
122
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return { data: null, error: { type: 'network_error', message: 'Max retries exceeded' } };
|
|
126
|
+
}
|
|
127
|
+
async function apiRequest(apiPath, options = {}, retries = 3) {
|
|
128
|
+
const result = await apiRequestWithError(apiPath, options, retries);
|
|
129
|
+
return result.data;
|
|
130
|
+
}
|
|
131
|
+
// -----------------------------------------------------------------------
|
|
132
|
+
// Registration
|
|
133
|
+
// -----------------------------------------------------------------------
|
|
134
|
+
async function register() {
|
|
135
|
+
log.info('Registering with coordinator', {
|
|
136
|
+
apiUrl: workerConfig.apiUrl,
|
|
137
|
+
hostname: workerConfig.hostname,
|
|
138
|
+
capacity: workerConfig.capacity,
|
|
139
|
+
});
|
|
140
|
+
const result = await apiRequest('/api/workers/register', {
|
|
141
|
+
method: 'POST',
|
|
142
|
+
body: JSON.stringify({
|
|
143
|
+
hostname: workerConfig.hostname,
|
|
144
|
+
capacity: workerConfig.capacity,
|
|
145
|
+
version: '1.0.0',
|
|
146
|
+
}),
|
|
147
|
+
});
|
|
148
|
+
if (result) {
|
|
149
|
+
log.status('registered', `Worker ID: ${result.workerId.substring(0, 8)}`);
|
|
150
|
+
}
|
|
151
|
+
return result;
|
|
152
|
+
}
|
|
153
|
+
async function transferSessionOwnership(sessionId, newWorkerId, oldWorkerId) {
|
|
154
|
+
const result = await apiRequest(`/api/sessions/${sessionId}/transfer-ownership`, {
|
|
155
|
+
method: 'POST',
|
|
156
|
+
body: JSON.stringify({ newWorkerId, oldWorkerId }),
|
|
157
|
+
});
|
|
158
|
+
if (result?.transferred) {
|
|
159
|
+
log.debug('Session ownership transferred', {
|
|
160
|
+
sessionId: sessionId.substring(0, 8),
|
|
161
|
+
oldWorkerId: oldWorkerId.substring(0, 8),
|
|
162
|
+
newWorkerId: newWorkerId.substring(0, 8),
|
|
163
|
+
});
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
log.warn('Failed to transfer session ownership', {
|
|
168
|
+
sessionId: sessionId.substring(0, 8),
|
|
169
|
+
reason: result?.reason,
|
|
170
|
+
});
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
async function attemptReregistration() {
|
|
175
|
+
if (reregistrationInProgress) {
|
|
176
|
+
log.debug('Re-registration already in progress, skipping');
|
|
177
|
+
return false;
|
|
178
|
+
}
|
|
179
|
+
reregistrationInProgress = true;
|
|
180
|
+
const oldWorkerId = workerId;
|
|
181
|
+
log.warn('Worker not found on server - attempting to re-register');
|
|
182
|
+
try {
|
|
183
|
+
const registration = await register();
|
|
184
|
+
if (registration) {
|
|
185
|
+
const newWid = registration.workerId;
|
|
186
|
+
workerId = newWid;
|
|
187
|
+
workerShortId = newWid.substring(4, 8); // Skip 'wkr_' prefix
|
|
188
|
+
consecutiveHeartbeatFailures = 0;
|
|
189
|
+
log.status('re-registered', `New Worker ID: ${workerShortId}`);
|
|
190
|
+
// Transfer ownership of active sessions to the new worker ID
|
|
191
|
+
if (oldWorkerId && activeOrchestrators.size > 0) {
|
|
192
|
+
log.info('Transferring ownership of active sessions', {
|
|
193
|
+
sessionCount: activeOrchestrators.size,
|
|
194
|
+
oldWorkerId: oldWorkerId.substring(0, 8),
|
|
195
|
+
newWorkerId: newWid.substring(0, 8),
|
|
196
|
+
});
|
|
197
|
+
const transferPromises = [];
|
|
198
|
+
for (const sessionId of activeOrchestrators.keys()) {
|
|
199
|
+
transferPromises.push(transferSessionOwnership(sessionId, newWid, oldWorkerId));
|
|
200
|
+
}
|
|
201
|
+
const results = await Promise.all(transferPromises);
|
|
202
|
+
const successCount = results.filter(Boolean).length;
|
|
203
|
+
log.info('Session ownership transfer complete', {
|
|
204
|
+
total: results.length,
|
|
205
|
+
succeeded: successCount,
|
|
206
|
+
failed: results.length - successCount,
|
|
207
|
+
});
|
|
208
|
+
// Update worker ID in all active orchestrators' activity emitters
|
|
209
|
+
for (const [sessionId, orchestrator] of activeOrchestrators.entries()) {
|
|
210
|
+
orchestrator.updateWorkerId(newWid);
|
|
211
|
+
log.debug('Updated orchestrator worker ID', {
|
|
212
|
+
sessionId: sessionId.substring(0, 8),
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return true;
|
|
217
|
+
}
|
|
218
|
+
log.error('Re-registration failed');
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
finally {
|
|
222
|
+
reregistrationInProgress = false;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// -----------------------------------------------------------------------
|
|
226
|
+
// Heartbeat
|
|
227
|
+
// -----------------------------------------------------------------------
|
|
228
|
+
async function sendHeartbeat() {
|
|
229
|
+
if (!workerId)
|
|
230
|
+
return;
|
|
231
|
+
const result = await apiRequestWithError(`/api/workers/${workerId}/heartbeat`, {
|
|
232
|
+
method: 'POST',
|
|
233
|
+
body: JSON.stringify({
|
|
234
|
+
activeCount,
|
|
235
|
+
load: {
|
|
236
|
+
cpu: os.loadavg()[0],
|
|
237
|
+
memory: 1 - os.freemem() / os.totalmem(),
|
|
238
|
+
},
|
|
239
|
+
}),
|
|
240
|
+
});
|
|
241
|
+
if (result.data) {
|
|
242
|
+
consecutiveHeartbeatFailures = 0;
|
|
243
|
+
log.debug('Heartbeat acknowledged', {
|
|
244
|
+
activeCount,
|
|
245
|
+
pendingWorkCount: result.data.pendingWorkCount,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
else if (result.error?.type === 'worker_not_found') {
|
|
249
|
+
consecutiveHeartbeatFailures++;
|
|
250
|
+
await attemptReregistration();
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
consecutiveHeartbeatFailures++;
|
|
254
|
+
log.warn('Heartbeat failed', {
|
|
255
|
+
consecutiveFailures: consecutiveHeartbeatFailures,
|
|
256
|
+
maxFailures: MAX_HEARTBEAT_FAILURES,
|
|
257
|
+
errorType: result.error?.type,
|
|
258
|
+
});
|
|
259
|
+
if (consecutiveHeartbeatFailures >= MAX_HEARTBEAT_FAILURES) {
|
|
260
|
+
log.error('Multiple heartbeat failures - checking if re-registration needed', {
|
|
261
|
+
consecutiveFailures: consecutiveHeartbeatFailures,
|
|
262
|
+
});
|
|
263
|
+
await attemptReregistration();
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// -----------------------------------------------------------------------
|
|
268
|
+
// Polling & claiming
|
|
269
|
+
// -----------------------------------------------------------------------
|
|
270
|
+
async function pollForWork() {
|
|
271
|
+
if (!workerId)
|
|
272
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
273
|
+
const result = await apiRequestWithError(`/api/workers/${workerId}/poll`);
|
|
274
|
+
if (result.error?.type === 'worker_not_found') {
|
|
275
|
+
await attemptReregistration();
|
|
276
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
277
|
+
}
|
|
278
|
+
if (!result.data) {
|
|
279
|
+
return { work: [], pendingPrompts: {}, hasPendingPrompts: false };
|
|
280
|
+
}
|
|
281
|
+
const pollData = result.data;
|
|
282
|
+
if (pollData.hasPendingPrompts) {
|
|
283
|
+
const totalPrompts = Object.values(pollData.pendingPrompts).reduce((sum, prompts) => sum + prompts.length, 0);
|
|
284
|
+
log.info('Received pending prompts', {
|
|
285
|
+
sessionCount: Object.keys(pollData.pendingPrompts).length,
|
|
286
|
+
totalPrompts,
|
|
287
|
+
sessions: Object.entries(pollData.pendingPrompts).map(([sessionId, prompts]) => ({
|
|
288
|
+
sessionId: sessionId.substring(0, 8),
|
|
289
|
+
promptCount: prompts.length,
|
|
290
|
+
promptIds: prompts.map((p) => p.id),
|
|
291
|
+
})),
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
return pollData;
|
|
295
|
+
}
|
|
296
|
+
async function claimWork(sessionId) {
|
|
297
|
+
if (!workerId)
|
|
298
|
+
return null;
|
|
299
|
+
return apiRequest(`/api/sessions/${sessionId}/claim`, {
|
|
300
|
+
method: 'POST',
|
|
301
|
+
body: JSON.stringify({ workerId }),
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
async function reportStatus(sessionId, status, extra) {
|
|
305
|
+
if (!workerId)
|
|
306
|
+
return;
|
|
307
|
+
await apiRequest(`/api/sessions/${sessionId}/status`, {
|
|
308
|
+
method: 'POST',
|
|
309
|
+
body: JSON.stringify({
|
|
310
|
+
workerId,
|
|
311
|
+
status,
|
|
312
|
+
...extra,
|
|
313
|
+
}),
|
|
314
|
+
});
|
|
315
|
+
log.debug(`Reported status: ${status}`, { sessionId });
|
|
316
|
+
}
|
|
317
|
+
async function postProgress(sessionId, milestone, message) {
|
|
318
|
+
if (!workerId)
|
|
319
|
+
return;
|
|
320
|
+
const result = await apiRequest(`/api/sessions/${sessionId}/progress`, {
|
|
321
|
+
method: 'POST',
|
|
322
|
+
body: JSON.stringify({
|
|
323
|
+
workerId,
|
|
324
|
+
milestone,
|
|
325
|
+
message,
|
|
326
|
+
}),
|
|
327
|
+
});
|
|
328
|
+
if (result?.posted) {
|
|
329
|
+
log.debug(`Progress posted: ${milestone}`, { sessionId });
|
|
330
|
+
}
|
|
331
|
+
else {
|
|
332
|
+
log.warn(`Failed to post progress: ${milestone}`, { reason: result?.reason });
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
async function checkSessionStopped(sessionId) {
|
|
336
|
+
const result = await apiRequest(`/api/sessions/${sessionId}/status`);
|
|
337
|
+
return result?.status === 'stopped';
|
|
338
|
+
}
|
|
339
|
+
async function deregister() {
|
|
340
|
+
if (!workerId)
|
|
341
|
+
return;
|
|
342
|
+
log.info('Deregistering worker');
|
|
343
|
+
const result = await apiRequest(`/api/workers/${workerId}`, {
|
|
344
|
+
method: 'DELETE',
|
|
345
|
+
});
|
|
346
|
+
if (result) {
|
|
347
|
+
log.status('stopped', `Unclaimed sessions: ${result.unclaimedSessions.length}`);
|
|
348
|
+
}
|
|
349
|
+
workerId = null;
|
|
350
|
+
}
|
|
351
|
+
// -----------------------------------------------------------------------
|
|
352
|
+
// Agent logger factory
|
|
353
|
+
// -----------------------------------------------------------------------
|
|
354
|
+
function createAgentLogger(issueIdentifier) {
|
|
355
|
+
return log.child({ issueIdentifier });
|
|
356
|
+
}
|
|
357
|
+
// -----------------------------------------------------------------------
|
|
358
|
+
// Work execution
|
|
359
|
+
// -----------------------------------------------------------------------
|
|
360
|
+
async function executeWork(work) {
|
|
361
|
+
const agentLog = createAgentLogger(work.issueIdentifier);
|
|
362
|
+
const isResume = !!work.claudeSessionId;
|
|
363
|
+
agentLog.section(`${isResume ? 'Resuming' : 'Starting'} work on ${work.issueIdentifier}`);
|
|
364
|
+
agentLog.info('Work details', {
|
|
365
|
+
hasPrompt: !!work.prompt,
|
|
366
|
+
isResume,
|
|
367
|
+
workType: work.workType,
|
|
368
|
+
});
|
|
369
|
+
activeCount++;
|
|
370
|
+
// Two-phase completion: set in try/catch, read in finally
|
|
371
|
+
let finalStatus = 'failed';
|
|
372
|
+
let statusPayload;
|
|
373
|
+
// Issue lock TTL refresher
|
|
374
|
+
let lockRefresher = null;
|
|
375
|
+
try {
|
|
376
|
+
await reportStatus(work.sessionId, 'running');
|
|
377
|
+
// Start lock TTL refresher (refresh every 60s, lock TTL is 2 hours)
|
|
378
|
+
if (work.issueId) {
|
|
379
|
+
lockRefresher = setInterval(async () => {
|
|
380
|
+
try {
|
|
381
|
+
const response = await apiRequest(`/api/sessions/${work.sessionId}/lock-refresh`, {
|
|
382
|
+
method: 'POST',
|
|
383
|
+
body: JSON.stringify({ workerId, issueId: work.issueId }),
|
|
384
|
+
});
|
|
385
|
+
if (response?.refreshed) {
|
|
386
|
+
agentLog.debug('Issue lock TTL refreshed');
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
catch {
|
|
390
|
+
// Non-fatal — lock has a 2hr TTL so missing one refresh is fine
|
|
391
|
+
}
|
|
392
|
+
}, 60_000);
|
|
393
|
+
}
|
|
394
|
+
// Post initial progress
|
|
395
|
+
await postProgress(work.sessionId, isResume ? 'resumed' : 'claimed', isResume
|
|
396
|
+
? `Resuming work on ${work.issueIdentifier}`
|
|
397
|
+
: `Worker claimed ${work.issueIdentifier}. Setting up environment...`);
|
|
398
|
+
// Create orchestrator with API activity proxy
|
|
399
|
+
const orchestrator = createOrchestrator({
|
|
400
|
+
maxConcurrent: 1,
|
|
401
|
+
worktreePath: path.resolve(gitRoot, '.worktrees'),
|
|
402
|
+
apiActivityConfig: {
|
|
403
|
+
baseUrl: workerConfig.apiUrl,
|
|
404
|
+
apiKey: workerConfig.apiKey,
|
|
405
|
+
workerId: workerId,
|
|
406
|
+
},
|
|
407
|
+
}, {
|
|
408
|
+
onIssueSelected: (issue) => {
|
|
409
|
+
agentLog.info('Issue fetched', {
|
|
410
|
+
title: issue.title.slice(0, 50),
|
|
411
|
+
labels: issue.labels.join(', '),
|
|
412
|
+
});
|
|
413
|
+
},
|
|
414
|
+
onAgentStart: (agent) => {
|
|
415
|
+
agentLog.status('running', `PID: ${agent.pid}`);
|
|
416
|
+
agentLog.debug('Agent details', {
|
|
417
|
+
worktree: agent.worktreePath,
|
|
418
|
+
});
|
|
419
|
+
reportStatus(work.sessionId, 'running', {
|
|
420
|
+
claudeSessionId: agent.sessionId,
|
|
421
|
+
worktreePath: agent.worktreePath,
|
|
422
|
+
});
|
|
423
|
+
postProgress(work.sessionId, 'started', `Agent started working on ${agent.identifier}`);
|
|
424
|
+
},
|
|
425
|
+
onAgentComplete: (agent) => {
|
|
426
|
+
agentLog.status('completed', `Exit code: ${agent.exitCode}`);
|
|
427
|
+
},
|
|
428
|
+
onAgentError: (_agent, error) => {
|
|
429
|
+
agentLog.error('Agent error', { error: error.message });
|
|
430
|
+
},
|
|
431
|
+
onAgentStopped: (_agent) => {
|
|
432
|
+
agentLog.status('stopped');
|
|
433
|
+
},
|
|
434
|
+
onAgentIncomplete: (agent) => {
|
|
435
|
+
agentLog.warn('Agent incomplete - worktree preserved', {
|
|
436
|
+
reason: agent.incompleteReason,
|
|
437
|
+
worktreePath: agent.worktreePath,
|
|
438
|
+
});
|
|
439
|
+
},
|
|
440
|
+
onClaudeSessionId: (_linearSessionId, claudeSessionId) => {
|
|
441
|
+
agentLog.debug('Claude session captured', { claudeSessionId });
|
|
442
|
+
reportStatus(work.sessionId, 'running', {
|
|
443
|
+
claudeSessionId,
|
|
444
|
+
});
|
|
445
|
+
},
|
|
446
|
+
});
|
|
447
|
+
// Store orchestrator for prompt forwarding
|
|
448
|
+
activeOrchestrators.set(work.sessionId, orchestrator);
|
|
449
|
+
agentLog.debug('Orchestrator registered for session', {
|
|
450
|
+
sessionId: work.sessionId.substring(0, 8),
|
|
451
|
+
});
|
|
452
|
+
let spawnedAgent;
|
|
453
|
+
// Retry configuration for "agent already running" conflicts
|
|
454
|
+
const MAX_SPAWN_RETRIES = 6;
|
|
455
|
+
const SPAWN_RETRY_DELAY_MS = 15000;
|
|
456
|
+
if (work.claudeSessionId) {
|
|
457
|
+
// Resume existing Claude session
|
|
458
|
+
agentLog.info('Resuming Claude session', {
|
|
459
|
+
claudeSessionId: work.claudeSessionId.substring(0, 12),
|
|
460
|
+
});
|
|
461
|
+
const prompt = work.prompt || `Continue work on ${work.issueIdentifier}`;
|
|
462
|
+
const result = await orchestrator.forwardPrompt(work.issueId, work.sessionId, prompt, work.claudeSessionId, work.workType);
|
|
463
|
+
if (!result.forwarded || !result.agent) {
|
|
464
|
+
throw new Error(`Failed to resume session: ${result.reason || 'unknown error'}`);
|
|
465
|
+
}
|
|
466
|
+
agentLog.success('Session resumed');
|
|
467
|
+
spawnedAgent = result.agent;
|
|
468
|
+
}
|
|
469
|
+
else {
|
|
470
|
+
// Fresh start with retry logic
|
|
471
|
+
agentLog.info('Spawning new agent', { workType: work.workType });
|
|
472
|
+
let lastError = null;
|
|
473
|
+
for (let attempt = 1; attempt <= MAX_SPAWN_RETRIES; attempt++) {
|
|
474
|
+
try {
|
|
475
|
+
spawnedAgent = await orchestrator.spawnAgentForIssue(work.issueIdentifier, work.sessionId, work.workType, work.prompt);
|
|
476
|
+
break;
|
|
477
|
+
}
|
|
478
|
+
catch (err) {
|
|
479
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
480
|
+
const isAgentRunning = lastError.message.includes('Agent already running') ||
|
|
481
|
+
lastError.message.includes('Agent is still running');
|
|
482
|
+
const isBranchConflict = lastError.message.includes('already checked out') ||
|
|
483
|
+
lastError.message.includes('is already checked out at');
|
|
484
|
+
const isRetriable = isAgentRunning || isBranchConflict;
|
|
485
|
+
if (isRetriable && attempt < MAX_SPAWN_RETRIES) {
|
|
486
|
+
const reason = isBranchConflict
|
|
487
|
+
? 'Branch in use by another agent'
|
|
488
|
+
: 'Agent still running';
|
|
489
|
+
agentLog.warn(`${reason}, waiting to retry (attempt ${attempt}/${MAX_SPAWN_RETRIES})`, { retryInMs: SPAWN_RETRY_DELAY_MS });
|
|
490
|
+
await postProgress(work.sessionId, 'waiting', `${reason}, waiting to retry (${attempt}/${MAX_SPAWN_RETRIES})...`);
|
|
491
|
+
await new Promise((resolve) => setTimeout(resolve, SPAWN_RETRY_DELAY_MS));
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
throw lastError;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
if (!spawnedAgent) {
|
|
499
|
+
throw lastError || new Error('Failed to spawn agent after retries');
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
agentLog.info('Agent spawned', {
|
|
503
|
+
pid: spawnedAgent.pid,
|
|
504
|
+
status: spawnedAgent.status,
|
|
505
|
+
});
|
|
506
|
+
if (!spawnedAgent.pid) {
|
|
507
|
+
agentLog.warn('Agent has no PID - spawn may have failed');
|
|
508
|
+
}
|
|
509
|
+
// Start a stop signal checker
|
|
510
|
+
let stopRequested = false;
|
|
511
|
+
const stopChecker = setInterval(async () => {
|
|
512
|
+
try {
|
|
513
|
+
if (await checkSessionStopped(work.sessionId)) {
|
|
514
|
+
agentLog.warn('Stop signal received');
|
|
515
|
+
stopRequested = true;
|
|
516
|
+
clearInterval(stopChecker);
|
|
517
|
+
await orchestrator.stopAgent(work.issueId, false);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
catch {
|
|
521
|
+
// Ignore errors in stop checker
|
|
522
|
+
}
|
|
523
|
+
}, 5000);
|
|
524
|
+
// Wait for agent to complete
|
|
525
|
+
agentLog.info('Waiting for agent to complete...');
|
|
526
|
+
const results = await orchestrator.waitForAll();
|
|
527
|
+
const agent = results[0];
|
|
528
|
+
clearInterval(stopChecker);
|
|
529
|
+
// Determine final status
|
|
530
|
+
if (stopRequested || agent?.stopReason === 'user_request') {
|
|
531
|
+
finalStatus = 'stopped';
|
|
532
|
+
await reportStatus(work.sessionId, 'finalizing');
|
|
533
|
+
await postProgress(work.sessionId, 'stopped', `Work stopped by user request`);
|
|
534
|
+
agentLog.status('stopped', 'Work stopped by user request');
|
|
535
|
+
}
|
|
536
|
+
else if (agent?.stopReason === 'timeout') {
|
|
537
|
+
finalStatus = 'failed';
|
|
538
|
+
statusPayload = { error: { message: 'Agent timed out' } };
|
|
539
|
+
await reportStatus(work.sessionId, 'finalizing');
|
|
540
|
+
await postProgress(work.sessionId, 'failed', `Work timed out`);
|
|
541
|
+
agentLog.status('stopped', 'Work timed out');
|
|
542
|
+
}
|
|
543
|
+
else if (agent?.status === 'completed') {
|
|
544
|
+
finalStatus = 'completed';
|
|
545
|
+
statusPayload = {
|
|
546
|
+
claudeSessionId: agent.sessionId,
|
|
547
|
+
worktreePath: agent.worktreePath,
|
|
548
|
+
};
|
|
549
|
+
await reportStatus(work.sessionId, 'finalizing');
|
|
550
|
+
await postProgress(work.sessionId, 'completed', `Work completed successfully on ${work.issueIdentifier}`);
|
|
551
|
+
agentLog.success('Work completed successfully');
|
|
552
|
+
}
|
|
553
|
+
else {
|
|
554
|
+
const errorMsg = agent?.error?.message || 'Agent did not complete successfully';
|
|
555
|
+
finalStatus = 'failed';
|
|
556
|
+
statusPayload = { error: { message: errorMsg } };
|
|
557
|
+
await reportStatus(work.sessionId, 'finalizing');
|
|
558
|
+
await postProgress(work.sessionId, 'failed', `Work failed: ${errorMsg}`);
|
|
559
|
+
agentLog.error('Work failed', { error: errorMsg });
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
catch (error) {
|
|
563
|
+
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
|
564
|
+
agentLog.error('Work execution failed', { error: errorMsg });
|
|
565
|
+
finalStatus = 'failed';
|
|
566
|
+
statusPayload = { error: { message: errorMsg } };
|
|
567
|
+
await reportStatus(work.sessionId, 'finalizing').catch(() => { });
|
|
568
|
+
await postProgress(work.sessionId, 'failed', `Work failed: ${errorMsg}`);
|
|
569
|
+
}
|
|
570
|
+
finally {
|
|
571
|
+
if (lockRefresher)
|
|
572
|
+
clearInterval(lockRefresher);
|
|
573
|
+
activeOrchestrators.delete(work.sessionId);
|
|
574
|
+
agentLog.debug('Orchestrator unregistered for session', {
|
|
575
|
+
sessionId: work.sessionId.substring(0, 8),
|
|
576
|
+
});
|
|
577
|
+
activeCount--;
|
|
578
|
+
// Report true terminal status AFTER all cleanup
|
|
579
|
+
await reportStatus(work.sessionId, finalStatus, statusPayload).catch((err) => {
|
|
580
|
+
agentLog.error('Failed to report final status', {
|
|
581
|
+
error: err instanceof Error ? err.message : String(err),
|
|
582
|
+
});
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
// -----------------------------------------------------------------------
|
|
587
|
+
// Main logic
|
|
588
|
+
// -----------------------------------------------------------------------
|
|
589
|
+
try {
|
|
590
|
+
log.section('AgentFactory Worker');
|
|
591
|
+
log.info('Configuration', {
|
|
592
|
+
apiUrl: workerConfig.apiUrl,
|
|
593
|
+
hostname: workerConfig.hostname,
|
|
594
|
+
capacity: workerConfig.capacity,
|
|
595
|
+
dryRun: workerConfig.dryRun,
|
|
596
|
+
});
|
|
597
|
+
// Register with coordinator
|
|
598
|
+
const registration = await register();
|
|
599
|
+
if (!registration) {
|
|
600
|
+
throw new Error('Failed to register with coordinator');
|
|
601
|
+
}
|
|
602
|
+
workerId = registration.workerId;
|
|
603
|
+
workerShortId = registration.workerId.substring(0, 8);
|
|
604
|
+
// Update logger with worker context
|
|
605
|
+
log = createLogger({ workerId, workerShortId }, { showTimestamp: true });
|
|
606
|
+
// Set up heartbeat
|
|
607
|
+
heartbeatTimer = setInterval(() => sendHeartbeat(), registration.heartbeatInterval);
|
|
608
|
+
// Send initial heartbeat
|
|
609
|
+
await sendHeartbeat();
|
|
610
|
+
// Main poll loop
|
|
611
|
+
log.info('Starting poll loop...');
|
|
612
|
+
while (running) {
|
|
613
|
+
if (signal?.aborted)
|
|
614
|
+
break;
|
|
615
|
+
try {
|
|
616
|
+
const availableCapacity = workerConfig.capacity - activeCount;
|
|
617
|
+
const pollResult = await pollForWork();
|
|
618
|
+
// Handle new work items if we have capacity
|
|
619
|
+
if (availableCapacity > 0 && pollResult.work.length > 0) {
|
|
620
|
+
log.info(`Found ${pollResult.work.length} work item(s)`, {
|
|
621
|
+
activeCount,
|
|
622
|
+
availableCapacity,
|
|
623
|
+
});
|
|
624
|
+
for (const item of pollResult.work.slice(0, availableCapacity)) {
|
|
625
|
+
if (!running)
|
|
626
|
+
break;
|
|
627
|
+
const claimResult = await claimWork(item.sessionId);
|
|
628
|
+
if (claimResult?.claimed) {
|
|
629
|
+
log.status('claimed', item.issueIdentifier);
|
|
630
|
+
if (workerConfig.dryRun) {
|
|
631
|
+
log.info(`[DRY RUN] Would execute: ${item.issueIdentifier}`);
|
|
632
|
+
}
|
|
633
|
+
else {
|
|
634
|
+
executeWork(item).catch((error) => {
|
|
635
|
+
log.error('Background work execution failed', {
|
|
636
|
+
error: error instanceof Error ? error.message : String(error),
|
|
637
|
+
});
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
else {
|
|
642
|
+
log.warn(`Failed to claim work: ${item.issueIdentifier}`);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
// Handle pending prompts for active sessions
|
|
647
|
+
if (pollResult.hasPendingPrompts) {
|
|
648
|
+
for (const [sessionId, prompts] of Object.entries(pollResult.pendingPrompts)) {
|
|
649
|
+
for (const prompt of prompts) {
|
|
650
|
+
log.info('Processing pending prompt', {
|
|
651
|
+
sessionId: sessionId.substring(0, 8),
|
|
652
|
+
promptId: prompt.id,
|
|
653
|
+
promptLength: prompt.prompt.length,
|
|
654
|
+
userName: prompt.userName,
|
|
655
|
+
});
|
|
656
|
+
const orchestrator = activeOrchestrators.get(sessionId);
|
|
657
|
+
if (!orchestrator) {
|
|
658
|
+
log.warn('No active orchestrator found for session', {
|
|
659
|
+
sessionId: sessionId.substring(0, 8),
|
|
660
|
+
promptId: prompt.id,
|
|
661
|
+
});
|
|
662
|
+
continue;
|
|
663
|
+
}
|
|
664
|
+
const agent = orchestrator.getAgentBySession(sessionId);
|
|
665
|
+
const claudeSessionId = agent?.claudeSessionId;
|
|
666
|
+
log.info('Forwarding prompt to Claude session', {
|
|
667
|
+
sessionId: sessionId.substring(0, 8),
|
|
668
|
+
promptId: prompt.id,
|
|
669
|
+
hasClaudeSession: !!claudeSessionId,
|
|
670
|
+
agentStatus: agent?.status,
|
|
671
|
+
});
|
|
672
|
+
try {
|
|
673
|
+
const result = await orchestrator.forwardPrompt(prompt.issueId, sessionId, prompt.prompt, claudeSessionId, agent?.workType);
|
|
674
|
+
if (result.forwarded) {
|
|
675
|
+
log.success(result.injected
|
|
676
|
+
? 'Message injected into running session'
|
|
677
|
+
: 'Prompt forwarded successfully', {
|
|
678
|
+
sessionId: sessionId.substring(0, 8),
|
|
679
|
+
promptId: prompt.id,
|
|
680
|
+
injected: result.injected ?? false,
|
|
681
|
+
resumed: result.resumed,
|
|
682
|
+
newAgentPid: result.agent?.pid,
|
|
683
|
+
});
|
|
684
|
+
const claimResult = await apiRequest(`/api/sessions/${sessionId}/prompts`, {
|
|
685
|
+
method: 'POST',
|
|
686
|
+
body: JSON.stringify({ promptId: prompt.id }),
|
|
687
|
+
});
|
|
688
|
+
if (claimResult?.claimed) {
|
|
689
|
+
log.debug('Prompt claimed', { promptId: prompt.id });
|
|
690
|
+
}
|
|
691
|
+
else {
|
|
692
|
+
log.warn('Failed to claim prompt', { promptId: prompt.id });
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
else {
|
|
696
|
+
log.error('Failed to forward prompt', {
|
|
697
|
+
sessionId: sessionId.substring(0, 8),
|
|
698
|
+
promptId: prompt.id,
|
|
699
|
+
reason: result.reason,
|
|
700
|
+
error: result.error?.message,
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
catch (error) {
|
|
705
|
+
log.error('Error forwarding prompt', {
|
|
706
|
+
sessionId: sessionId.substring(0, 8),
|
|
707
|
+
promptId: prompt.id,
|
|
708
|
+
error: error instanceof Error ? error.message : String(error),
|
|
709
|
+
});
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
catch (error) {
|
|
716
|
+
log.error('Poll loop error', {
|
|
717
|
+
error: error instanceof Error ? error.message : String(error),
|
|
718
|
+
});
|
|
719
|
+
}
|
|
720
|
+
// Wait before next poll
|
|
721
|
+
await new Promise((resolve) => setTimeout(resolve, registration.pollInterval));
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
finally {
|
|
725
|
+
signal?.removeEventListener('abort', onAbort);
|
|
726
|
+
// Clean up timers
|
|
727
|
+
if (heartbeatTimer)
|
|
728
|
+
clearInterval(heartbeatTimer);
|
|
729
|
+
// Deregister if we haven't already
|
|
730
|
+
if (workerId && !shutdownInProgress) {
|
|
731
|
+
await deregister().catch(() => { });
|
|
732
|
+
}
|
|
733
|
+
log.status('stopped', 'Shutdown complete');
|
|
734
|
+
}
|
|
735
|
+
}
|