@hamp10/agentforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +558 -0
- package/package.json +22 -0
- package/src/HampAgentCLI.js +125 -0
- package/src/OllamaAgent.js +415 -0
- package/src/OpenClawCLI.js +1520 -0
- package/src/hampagent/browser.js +185 -0
- package/src/hampagent/runner.js +277 -0
- package/src/hampagent/sessions.js +62 -0
- package/src/hampagent/tools.js +298 -0
- package/src/preview-server.js +260 -0
- package/src/worker.js +1791 -0
- package/templates/agent/AGENTFORGE.md +348 -0
- package/templates/agent/AGENTS.md +212 -0
- package/templates/agent/SOUL.md +36 -0
- package/templates/agent/TOOLS.md +40 -0
package/src/worker.js
ADDED
|
@@ -0,0 +1,1791 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import WebSocket from 'ws';
|
|
4
|
+
import { OpenClawCLI } from './OpenClawCLI.js';
|
|
5
|
+
import { HampAgentCLI } from './HampAgentCLI.js';
|
|
6
|
+
import { OllamaAgent } from './OllamaAgent.js';
|
|
7
|
+
import EventEmitter from 'events';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync, copyFileSync, statSync, unlinkSync } from 'fs';
|
|
10
|
+
import { fileURLToPath } from 'url';
|
|
11
|
+
import { homedir, hostname } from 'os';
|
|
12
|
+
import { spawn } from 'child_process';
|
|
13
|
+
|
|
14
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
|
|
16
|
+
export class AgentForgeWorker extends EventEmitter {
|
|
17
|
+
constructor(token, railwayUrl = 'wss://agentforgeai-production.up.railway.app/socket', workerConfig = {}) {
|
|
18
|
+
super();
|
|
19
|
+
this.token = token;
|
|
20
|
+
this.railwayUrl = railwayUrl;
|
|
21
|
+
this.ws = null;
|
|
22
|
+
|
|
23
|
+
// Pick agent backend from config
|
|
24
|
+
if (workerConfig.provider === 'local') {
|
|
25
|
+
const url = workerConfig.localUrl || 'http://localhost:11434';
|
|
26
|
+
const model = workerConfig.localModel || 'llama3.1:8b';
|
|
27
|
+
console.log(`đĻ Using local model backend: ${url} / ${model}`);
|
|
28
|
+
this.cli = new OllamaAgent(url, model);
|
|
29
|
+
} else if (OpenClawCLI.isAvailable()) {
|
|
30
|
+
this.cli = new OpenClawCLI();
|
|
31
|
+
// Wire in the OpenClaw Gateway streaming config so OpenClawCLI can use
|
|
32
|
+
// per-token SSE streaming instead of waiting for subprocess to exit
|
|
33
|
+
try {
|
|
34
|
+
const cfgPath = path.join(homedir(), '.openclaw', 'openclaw.json');
|
|
35
|
+
if (existsSync(cfgPath)) {
|
|
36
|
+
const cfg = JSON.parse(readFileSync(cfgPath, 'utf-8'));
|
|
37
|
+
const port = cfg?.gateway?.port || 18789;
|
|
38
|
+
const token = cfg?.gateway?.auth?.token;
|
|
39
|
+
if (port && token) {
|
|
40
|
+
this.cli.gatewayPort = port;
|
|
41
|
+
this.cli.gatewayToken = token;
|
|
42
|
+
console.log(`đ OpenClaw Gateway streaming enabled (port ${port})`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
} catch (err) {
|
|
46
|
+
console.warn(`â ī¸ Could not load gateway config for streaming: ${err.message}`);
|
|
47
|
+
}
|
|
48
|
+
} else {
|
|
49
|
+
// openclaw not installed â cannot run without a configured backend
|
|
50
|
+
console.error('');
|
|
51
|
+
console.error('â No AI backend configured.');
|
|
52
|
+
console.error('');
|
|
53
|
+
console.error(' AgentForge needs an AI model to run agents.');
|
|
54
|
+
console.error(' Configure a local model server (Ollama, LM Studio, Jan, etc.):');
|
|
55
|
+
console.error('');
|
|
56
|
+
console.error(' agentforge local --url http://localhost:11434 --model llama3.1:8b');
|
|
57
|
+
console.error('');
|
|
58
|
+
console.error(' Then run: agentforge start');
|
|
59
|
+
console.error('');
|
|
60
|
+
process.exit(1);
|
|
61
|
+
}
|
|
62
|
+
// Hampagent â always available alongside openclaw
|
|
63
|
+
this.hampagent = new HampAgentCLI();
|
|
64
|
+
|
|
65
|
+
this.activeAgents = new Map();
|
|
66
|
+
this.reconnectAttempts = 0;
|
|
67
|
+
this.maxReconnectAttempts = 10;
|
|
68
|
+
|
|
69
|
+
// Per-agent task queues to prevent concurrent openclaw processes
|
|
70
|
+
this.agentQueues = new Map(); // agentId -> array of tasks
|
|
71
|
+
this.agentProcessing = new Map(); // agentId -> boolean (is currently processing)
|
|
72
|
+
this.processingStartTime = new Map(); // agentId -> timestamp when processing started
|
|
73
|
+
this.PROCESSING_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes max for stale processing state (large projects with Opus can be slow)
|
|
74
|
+
|
|
75
|
+
// Track running tasks for cancellation
|
|
76
|
+
this.runningTasks = new Map(); // taskId -> { agentId, cancelled }
|
|
77
|
+
|
|
78
|
+
// Queue for messages that couldn't be sent while disconnected
|
|
79
|
+
this.pendingMessages = [];
|
|
80
|
+
this.maxPendingMessages = 100; // Prevent unbounded growth
|
|
81
|
+
|
|
82
|
+
// Track recently sent completions to prevent duplicates
|
|
83
|
+
this.recentCompletions = new Set();
|
|
84
|
+
this.completionTTL = 30000; // 30 seconds
|
|
85
|
+
|
|
86
|
+
// Track agent activity for stuck detection
|
|
87
|
+
this.lastAgentActivity = new Map(); // agentId -> timestamp
|
|
88
|
+
this.pingsSinceActivity = new Map(); // agentId -> count
|
|
89
|
+
this.STUCK_PING_THRESHOLD = 2; // 2 pings with no activity = stuck (~60s since server pings every 30s)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
speakTextOutLoud(utterance) {
|
|
93
|
+
if (!utterance || typeof utterance !== 'string') return;
|
|
94
|
+
const text = utterance.trim();
|
|
95
|
+
if (!text) return;
|
|
96
|
+
|
|
97
|
+
if (process.platform !== 'darwin') {
|
|
98
|
+
console.log('đ TTS requested but platform is not macOS; skipping local audio playback.');
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
const volumeProc = spawn('osascript', ['-e', 'set volume without output muted', '-e', 'set volume output volume 80'], {
|
|
104
|
+
stdio: 'ignore'
|
|
105
|
+
});
|
|
106
|
+
volumeProc.on('error', (err) => console.warn('â ī¸ Unable to adjust volume:', err.message));
|
|
107
|
+
} catch (err) {
|
|
108
|
+
console.warn('â ī¸ Volume adjustment failed:', err.message);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
const sayProc = spawn('say', [text], { stdio: 'ignore' });
|
|
113
|
+
sayProc.on('error', (err) => console.warn('â ī¸ "say" command failed:', err.message));
|
|
114
|
+
console.log(`đ Speaking aloud: "${text.slice(0, 80)}${text.length > 80 ? 'âĻ' : ''}"`);
|
|
115
|
+
} catch (err) {
|
|
116
|
+
console.warn('â ī¸ Unable to invoke "say":', err.message);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
extractSpeechText(toolInput) {
|
|
121
|
+
if (!toolInput) return '';
|
|
122
|
+
|
|
123
|
+
const preferredKeys = ['text', 'input', 'message', 'utterance', 'prompt', 'transcript', 'content'];
|
|
124
|
+
const visited = new Set();
|
|
125
|
+
|
|
126
|
+
const readValue = (value) => {
|
|
127
|
+
if (value == null) return '';
|
|
128
|
+
if (typeof value === 'string') {
|
|
129
|
+
const trimmed = value.trim();
|
|
130
|
+
return trimmed.length > 0 ? trimmed : '';
|
|
131
|
+
}
|
|
132
|
+
if (typeof value === 'number') {
|
|
133
|
+
return String(value);
|
|
134
|
+
}
|
|
135
|
+
if (Array.isArray(value)) {
|
|
136
|
+
for (const item of value) {
|
|
137
|
+
const result = readValue(item);
|
|
138
|
+
if (result) return result;
|
|
139
|
+
}
|
|
140
|
+
return '';
|
|
141
|
+
}
|
|
142
|
+
if (typeof value === 'object') {
|
|
143
|
+
if (visited.has(value)) return '';
|
|
144
|
+
visited.add(value);
|
|
145
|
+
|
|
146
|
+
if (typeof value.text === 'string' && value.text.trim()) {
|
|
147
|
+
return value.text.trim();
|
|
148
|
+
}
|
|
149
|
+
if (value.type === 'text' && typeof value.text === 'string' && value.text.trim()) {
|
|
150
|
+
return value.text.trim();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
for (const key of preferredKeys) {
|
|
154
|
+
if (key in value) {
|
|
155
|
+
const result = readValue(value[key]);
|
|
156
|
+
if (result) return result;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
for (const nested of Object.values(value)) {
|
|
161
|
+
const result = readValue(nested);
|
|
162
|
+
if (result) return result;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return '';
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
return readValue(toolInput);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
async initialize() {
|
|
172
|
+
this._killOrphanedAgents();
|
|
173
|
+
this.installPreviewServer();
|
|
174
|
+
this._startAutoUpdateCheck();
|
|
175
|
+
console.log('â
Worker initialized');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
_killOrphanedAgents() {
|
|
179
|
+
// Kill any openclaw agent processes left over from a previous worker session.
|
|
180
|
+
// Without this, orphaned processes reconnect to the gateway and block the task queue.
|
|
181
|
+
for (const name of ['openclaw-agent', 'openclaw-gateway']) {
|
|
182
|
+
try {
|
|
183
|
+
const p = spawn('pkill', ['-f', name], { stdio: 'ignore' });
|
|
184
|
+
p.on('close', (code) => {
|
|
185
|
+
if (code === 0) console.log(`đ§š Killed orphaned ${name} processes`);
|
|
186
|
+
});
|
|
187
|
+
} catch {}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Guard against bloated OpenClaw sessions causing API failures.
|
|
193
|
+
* OpenClaw sessions accumulate ALL messages (tool calls, results, thinking blocks).
|
|
194
|
+
* Once a session hits ~400KB the Anthropic API starts returning internal server errors
|
|
195
|
+
* and the agent deadlocks â it can't respond or terminate.
|
|
196
|
+
*
|
|
197
|
+
* Strategy: keep the most recent messages from the session JSONL so the agent
|
|
198
|
+
* retains recent context (roughly the last N turns), rather than wiping it entirely.
|
|
199
|
+
* This minimises context loss while preventing unbounded growth.
|
|
200
|
+
*/
|
|
201
|
+
_guardOpenClawSession(agentId) {
|
|
202
|
+
const MAX_SESSION_BYTES = 400_000; // ~400KB â trim above this
|
|
203
|
+
const KEEP_BYTES = 150_000; // keep last ~150KB of messages
|
|
204
|
+
const sessionsDir = path.join(homedir(), '.openclaw', 'agents', agentId, 'sessions');
|
|
205
|
+
if (!existsSync(sessionsDir)) return;
|
|
206
|
+
|
|
207
|
+
let trimmed = false;
|
|
208
|
+
for (const file of readdirSync(sessionsDir)) {
|
|
209
|
+
if (!file.endsWith('.jsonl')) continue;
|
|
210
|
+
const filePath = path.join(sessionsDir, file);
|
|
211
|
+
let size;
|
|
212
|
+
try { size = statSync(filePath).size; } catch { continue; }
|
|
213
|
+
if (size <= MAX_SESSION_BYTES) continue;
|
|
214
|
+
|
|
215
|
+
// Trim: keep the session header line(s) + the tail of the file
|
|
216
|
+
try {
|
|
217
|
+
const raw = readFileSync(filePath, 'utf-8');
|
|
218
|
+
const lines = raw.split('\n').filter(l => l.trim());
|
|
219
|
+
// Always keep the first line (session metadata)
|
|
220
|
+
const header = lines[0] || '';
|
|
221
|
+
// Rebuild from the tail until we have roughly KEEP_BYTES
|
|
222
|
+
const tail = [];
|
|
223
|
+
let kept = 0;
|
|
224
|
+
for (let i = lines.length - 1; i >= 1; i--) {
|
|
225
|
+
kept += Buffer.byteLength(lines[i], 'utf-8') + 1;
|
|
226
|
+
if (kept > KEEP_BYTES) break;
|
|
227
|
+
tail.unshift(lines[i]);
|
|
228
|
+
}
|
|
229
|
+
const trimmedContent = [header, ...tail].join('\n') + '\n';
|
|
230
|
+
writeFileSync(filePath, trimmedContent, 'utf-8');
|
|
231
|
+
console.log(`âī¸ [${agentId}] Trimmed session ${file} from ${Math.round(size/1024)}KB â ${Math.round(Buffer.byteLength(trimmedContent,'utf-8')/1024)}KB (kept last ${tail.length} msgs)`);
|
|
232
|
+
trimmed = true;
|
|
233
|
+
} catch (e) {
|
|
234
|
+
// If trimming fails, delete entirely â a fresh session is better than a deadlock
|
|
235
|
+
try { unlinkSync(filePath); console.log(`đī¸ [${agentId}] Deleted oversized session ${file} (${Math.round(size/1024)}KB) â trim failed`); } catch {}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return trimmed;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
installPreviewServer() {
|
|
242
|
+
try {
|
|
243
|
+
const src = path.join(__dirname, 'preview-server.js');
|
|
244
|
+
const destDir = path.join(homedir(), '.agentforge');
|
|
245
|
+
const dest = path.join(destDir, 'preview-server.js');
|
|
246
|
+
if (!existsSync(destDir)) mkdirSync(destDir, { recursive: true });
|
|
247
|
+
copyFileSync(src, dest);
|
|
248
|
+
console.log('đ Preview server installed at ~/.agentforge/preview-server.js');
|
|
249
|
+
} catch (e) {
|
|
250
|
+
console.warn('â ī¸ Could not install preview server:', e.message);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async connect() {
|
|
255
|
+
// Start periodic queue health check (every 10 seconds)
|
|
256
|
+
if (!this.queueHealthInterval) {
|
|
257
|
+
this.queueHealthInterval = setInterval(() => {
|
|
258
|
+
// Log current state
|
|
259
|
+
const processing = [];
|
|
260
|
+
const queued = [];
|
|
261
|
+
for (const [agentId, isProcessing] of this.agentProcessing.entries()) {
|
|
262
|
+
if (isProcessing) {
|
|
263
|
+
const elapsed = Date.now() - (this.processingStartTime.get(agentId) || Date.now());
|
|
264
|
+
processing.push(`${agentId.slice(-8)}(${Math.round(elapsed/1000)}s)`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
for (const [agentId, queue] of this.agentQueues.entries()) {
|
|
268
|
+
if (queue.length > 0) {
|
|
269
|
+
queued.push(`${agentId.slice(-8)}:${queue.length}`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
if (processing.length > 0 || queued.length > 0) {
|
|
273
|
+
console.log(`đ State: processing=[${processing.join(',')}] queued=[${queued.join(',')}]`);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
this.resetStaleProcessingStates();
|
|
277
|
+
this.processAllQueues();
|
|
278
|
+
}, 10000);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return new Promise((resolve, reject) => {
|
|
282
|
+
console.log(`đ Connecting to ${this.railwayUrl}...`);
|
|
283
|
+
|
|
284
|
+
this.ws = new WebSocket(this.railwayUrl, {
|
|
285
|
+
headers: {
|
|
286
|
+
'Authorization': `Bearer ${this.token}`,
|
|
287
|
+
'X-Worker-Type': 'agentforge'
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
this.ws.on('open', () => {
|
|
292
|
+
console.log('â
Connected to AgentForge');
|
|
293
|
+
const wasReconnect = this.reconnectAttempts > 0;
|
|
294
|
+
this.reconnectAttempts = 0;
|
|
295
|
+
|
|
296
|
+
// Collect any tasks still running on this machine so the server doesn't
|
|
297
|
+
// falsely declare them idle when it processes the reconnect.
|
|
298
|
+
const liveTaskIds = [];
|
|
299
|
+
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
300
|
+
if (!taskInfo.cancelled) liveTaskIds.push(taskId);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Register worker
|
|
304
|
+
this.send({
|
|
305
|
+
type: 'worker_register',
|
|
306
|
+
capabilities: {
|
|
307
|
+
maxAgents: 100, // API limits support 100+, real limit is local resources
|
|
308
|
+
openclawConfigured: true, // Uses OpenClaw with Claude Max (browser OAuth, no API key needed)
|
|
309
|
+
platform: process.platform,
|
|
310
|
+
arch: process.arch,
|
|
311
|
+
deviceName: homedir().split('/').pop() + '@' + hostname(),
|
|
312
|
+
hostname: hostname(),
|
|
313
|
+
anthropicApiKey: process.env.ANTHROPIC_API_KEY || this.cli?.anthropicApiKey || null
|
|
314
|
+
},
|
|
315
|
+
// Tasks still running locally â server should keep them active, not broadcast idle
|
|
316
|
+
liveTaskIds
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
// On reconnect, reset stale processing states and flush pending messages
|
|
320
|
+
if (wasReconnect) {
|
|
321
|
+
this.resetStaleProcessingStates();
|
|
322
|
+
setTimeout(() => {
|
|
323
|
+
this.flushPendingMessages();
|
|
324
|
+
this.processAllQueues(); // Kick-start any stalled queues
|
|
325
|
+
}, 500);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
resolve();
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
this.ws.on('message', (data) => {
|
|
332
|
+
try {
|
|
333
|
+
const message = JSON.parse(data.toString());
|
|
334
|
+
this.handleMessage(message);
|
|
335
|
+
} catch (error) {
|
|
336
|
+
console.error('â Failed to parse message:', error);
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
this.ws.on('close', () => {
|
|
341
|
+
console.log('â Disconnected from AgentForge');
|
|
342
|
+
this.handleDisconnect();
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
this.ws.on('error', (error) => {
|
|
346
|
+
console.error('â WebSocket error:', error.message);
|
|
347
|
+
reject(error);
|
|
348
|
+
});
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
handleDisconnect() {
|
|
353
|
+
// Warn about running tasks that might have results queued
|
|
354
|
+
const runningCount = this.runningTasks.size;
|
|
355
|
+
const pendingCount = this.pendingMessages.length;
|
|
356
|
+
if (runningCount > 0 || pendingCount > 0) {
|
|
357
|
+
console.log(`â ī¸ Disconnect with ${runningCount} running tasks, ${pendingCount} pending messages`);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
361
|
+
this.reconnectAttempts++;
|
|
362
|
+
const delay = Math.min(1000 * Math.pow(2, this.reconnectAttempts - 1), 30000);
|
|
363
|
+
console.log(`đ Reconnecting in ${delay / 1000}s (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})...`);
|
|
364
|
+
setTimeout(() => this.connect(), delay);
|
|
365
|
+
} else {
|
|
366
|
+
console.error('â Max reconnection attempts reached. Please restart the worker.');
|
|
367
|
+
process.exit(1);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
async handleMessage(message) {
|
|
372
|
+
// Don't log pings â they flood the terminal every 30s with no useful info
|
|
373
|
+
if (message.type !== 'ping') {
|
|
374
|
+
console.log(`đ¨ Received: ${message.type}`);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
switch (message.type) {
|
|
378
|
+
case 'task_assign':
|
|
379
|
+
// Inject API key from server if provided â ensures even workers without local key config work
|
|
380
|
+
if (message.anthropicApiKey) {
|
|
381
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
382
|
+
process.env.ANTHROPIC_API_KEY = message.anthropicApiKey;
|
|
383
|
+
console.log('đ Using API key from server task_assign');
|
|
384
|
+
}
|
|
385
|
+
if (this.cli && typeof this.cli === 'object' && 'anthropicApiKey' in this.cli) {
|
|
386
|
+
if (this.cli.anthropicApiKey !== message.anthropicApiKey) {
|
|
387
|
+
this.cli.anthropicApiKey = message.anthropicApiKey;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
// Also wire key to hampagent
|
|
391
|
+
if (!this.hampagent.anthropicApiKey) {
|
|
392
|
+
this.hampagent.anthropicApiKey = message.anthropicApiKey;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
await this.executeTask(message);
|
|
396
|
+
break;
|
|
397
|
+
|
|
398
|
+
case 'task_cancel':
|
|
399
|
+
// Support cancellation by taskId or agentId
|
|
400
|
+
console.log(`đ¨ CANCEL REQUEST: taskId=${message.taskId} agentId=${message.agentId}`);
|
|
401
|
+
if (message.taskId) {
|
|
402
|
+
await this.cancelTask(message.taskId);
|
|
403
|
+
} else if (message.agentId) {
|
|
404
|
+
await this.cancelTaskByAgent(message.agentId);
|
|
405
|
+
} else {
|
|
406
|
+
console.log(`â ī¸ task_cancel received without taskId or agentId!`);
|
|
407
|
+
}
|
|
408
|
+
break;
|
|
409
|
+
|
|
410
|
+
case 'ping':
|
|
411
|
+
this.send({ type: 'pong' });
|
|
412
|
+
// Check for stuck agents - if processing but no activity for 2+ pings
|
|
413
|
+
this.checkForStuckAgents();
|
|
414
|
+
break;
|
|
415
|
+
|
|
416
|
+
case 'worker_restart':
|
|
417
|
+
console.log('đ Received worker_restart command â pulling latest code then restarting...');
|
|
418
|
+
this.send({ type: 'worker_restarting' });
|
|
419
|
+
setTimeout(() => this._updateAndRestart(), 200);
|
|
420
|
+
break;
|
|
421
|
+
|
|
422
|
+
case 'read_agent_files':
|
|
423
|
+
await this.handleReadAgentFiles(message);
|
|
424
|
+
break;
|
|
425
|
+
|
|
426
|
+
case 'write_agent_file':
|
|
427
|
+
await this.handleWriteAgentFile(message);
|
|
428
|
+
break;
|
|
429
|
+
|
|
430
|
+
case 'shell_exec': {
|
|
431
|
+
const { execId, command } = message;
|
|
432
|
+
console.log(`đĨī¸ shell_exec [${execId}]: ${command}`);
|
|
433
|
+
try {
|
|
434
|
+
const { execSync } = await import('child_process');
|
|
435
|
+
const output = execSync(command, { encoding: 'utf-8', timeout: 30000 });
|
|
436
|
+
this.send({ type: 'shell_exec_result', execId, success: true, output });
|
|
437
|
+
} catch (e) {
|
|
438
|
+
this.send({ type: 'shell_exec_result', execId, success: false, output: e.message });
|
|
439
|
+
}
|
|
440
|
+
break;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
default:
|
|
444
|
+
console.log(`â ī¸ Unknown message type: ${message.type}`);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
async executeTask(taskData) {
|
|
449
|
+
const { agentId, taskId } = taskData;
|
|
450
|
+
console.log(`đŦ Task received: ${taskId} for agent ${agentId}`);
|
|
451
|
+
|
|
452
|
+
// Initialize queue for this agent if it doesn't exist
|
|
453
|
+
if (!this.agentQueues.has(agentId)) {
|
|
454
|
+
this.agentQueues.set(agentId, []);
|
|
455
|
+
this.agentProcessing.set(agentId, false);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// Add task to queue
|
|
459
|
+
const queue = this.agentQueues.get(agentId);
|
|
460
|
+
queue.push(taskData);
|
|
461
|
+
|
|
462
|
+
const queueLength = queue.length;
|
|
463
|
+
const isProcessing = this.agentProcessing.get(agentId);
|
|
464
|
+
|
|
465
|
+
if (queueLength > 1 || isProcessing) {
|
|
466
|
+
console.log(`đĨ Queued: ${taskId} (${queueLength} in queue, processing=${isProcessing})`);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Start processing queue if not already processing
|
|
470
|
+
this.processQueue(agentId);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
async processQueue(agentId) {
|
|
474
|
+
// If already processing, check if it's stale
|
|
475
|
+
if (this.agentProcessing.get(agentId)) {
|
|
476
|
+
const startTime = this.processingStartTime.get(agentId);
|
|
477
|
+
const elapsed = startTime ? Date.now() - startTime : Infinity;
|
|
478
|
+
const queueLen = this.agentQueues.get(agentId)?.length || 0;
|
|
479
|
+
|
|
480
|
+
if (elapsed > this.PROCESSING_TIMEOUT_MS) {
|
|
481
|
+
console.log(`â ī¸ Agent ${agentId} stuck for ${Math.round(elapsed/1000)}s, forcing reset`);
|
|
482
|
+
this.agentProcessing.set(agentId, false);
|
|
483
|
+
this.processingStartTime.delete(agentId);
|
|
484
|
+
// Kill the stuck process
|
|
485
|
+
this.cli.cancelAgent(agentId);
|
|
486
|
+
} else {
|
|
487
|
+
// Log that we're waiting (every 30s to avoid spam)
|
|
488
|
+
if (elapsed > 0 && elapsed % 30000 < 1000 && queueLen > 0) {
|
|
489
|
+
console.log(`âŗ Agent ${agentId} busy (${Math.round(elapsed/1000)}s), ${queueLen} task(s) waiting`);
|
|
490
|
+
}
|
|
491
|
+
return; // Still processing, task will trigger next when done
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
const queue = this.agentQueues.get(agentId);
|
|
496
|
+
if (!queue || queue.length === 0) {
|
|
497
|
+
return; // Queue empty, nothing to do
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Mark as processing with timestamp
|
|
501
|
+
this.agentProcessing.set(agentId, true);
|
|
502
|
+
this.processingStartTime.set(agentId, Date.now());
|
|
503
|
+
console.log(`đ Starting task for ${agentId} (${queue.length} in queue)`);
|
|
504
|
+
|
|
505
|
+
// Get next task from queue
|
|
506
|
+
const taskData = queue.shift();
|
|
507
|
+
|
|
508
|
+
// Guard against undefined task data (can happen during reconnection race)
|
|
509
|
+
if (!taskData) {
|
|
510
|
+
console.warn(`â ī¸ Queue shift returned undefined for agent ${agentId}, clearing processing flag`);
|
|
511
|
+
this.agentProcessing.set(agentId, false);
|
|
512
|
+
return;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Wrap execution with a hard timeout that ALWAYS resolves
|
|
516
|
+
// This ensures processQueue ALWAYS completes and clears state
|
|
517
|
+
// Should fire AFTER the task hard timeout but BEFORE the task truly hangs forever
|
|
518
|
+
const QUEUE_TIMEOUT_MS = 31 * 60 * 1000; // 31 minutes (slightly longer than 30 min task timeout)
|
|
519
|
+
|
|
520
|
+
let executeTaskCompleted = false;
|
|
521
|
+
let queueTimeoutFired = false;
|
|
522
|
+
let queueTimeoutTimer = null;
|
|
523
|
+
|
|
524
|
+
try {
|
|
525
|
+
await Promise.race([
|
|
526
|
+
this.executeTaskNow(taskData).then(result => {
|
|
527
|
+
executeTaskCompleted = true;
|
|
528
|
+
// CRITICAL: Clear the queue timeout since task completed successfully
|
|
529
|
+
if (queueTimeoutTimer) {
|
|
530
|
+
clearTimeout(queueTimeoutTimer);
|
|
531
|
+
queueTimeoutTimer = null;
|
|
532
|
+
}
|
|
533
|
+
console.log(`[${agentId}] executeTaskNow completed normally`);
|
|
534
|
+
return result;
|
|
535
|
+
}).catch(error => {
|
|
536
|
+
executeTaskCompleted = true;
|
|
537
|
+
// CRITICAL: Clear the queue timeout since task completed (with error)
|
|
538
|
+
if (queueTimeoutTimer) {
|
|
539
|
+
clearTimeout(queueTimeoutTimer);
|
|
540
|
+
queueTimeoutTimer = null;
|
|
541
|
+
}
|
|
542
|
+
console.log(`[${agentId}] executeTaskNow rejected with error: ${error.message}`);
|
|
543
|
+
throw error;
|
|
544
|
+
}),
|
|
545
|
+
new Promise((resolve) => {
|
|
546
|
+
queueTimeoutTimer = setTimeout(() => {
|
|
547
|
+
// Don't fire if task already completed
|
|
548
|
+
if (executeTaskCompleted) {
|
|
549
|
+
console.log(`â° Queue timeout fired but task already completed, ignoring`);
|
|
550
|
+
return;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
queueTimeoutFired = true;
|
|
554
|
+
console.log(`â° QUEUE TIMEOUT: Task ${taskData.taskId} exceeded ${QUEUE_TIMEOUT_MS/1000}s`);
|
|
555
|
+
console.log(`â° executeTaskCompleted: ${executeTaskCompleted}`);
|
|
556
|
+
console.log(`â° This indicates executeTaskNow Promise never settled - likely OpenClawCLI.runAgentTask hung`);
|
|
557
|
+
|
|
558
|
+
// Force kill the agent process
|
|
559
|
+
const killed = this.cli.cancelAgent(agentId);
|
|
560
|
+
console.log(`â° cancelAgent result: ${killed}`);
|
|
561
|
+
|
|
562
|
+
// Send debug report for queue timeout (different from task timeout)
|
|
563
|
+
const diagnostics = this.collectDiagnostics(
|
|
564
|
+
agentId,
|
|
565
|
+
taskData.taskId,
|
|
566
|
+
new Error(`Queue timeout - Promise hung for ${QUEUE_TIMEOUT_MS/1000}s after task should have completed`),
|
|
567
|
+
'queue_timeout'
|
|
568
|
+
);
|
|
569
|
+
this.sendDebugReport(diagnostics, `QUEUE TIMEOUT: Task ${taskData.taskId} Promise never resolved. This usually means the OpenClaw process close event didn't fire. cancelAgent: ${killed}`);
|
|
570
|
+
|
|
571
|
+
resolve({ timeout: true });
|
|
572
|
+
}, QUEUE_TIMEOUT_MS);
|
|
573
|
+
})
|
|
574
|
+
]);
|
|
575
|
+
} catch (error) {
|
|
576
|
+
console.error(`â Task execution error for ${taskData.taskId}:`, error);
|
|
577
|
+
} finally {
|
|
578
|
+
// Clear timeout if still pending
|
|
579
|
+
if (queueTimeoutTimer) {
|
|
580
|
+
clearTimeout(queueTimeoutTimer);
|
|
581
|
+
queueTimeoutTimer = null;
|
|
582
|
+
}
|
|
583
|
+
// Log which path we took
|
|
584
|
+
if (queueTimeoutFired && !executeTaskCompleted) {
|
|
585
|
+
console.log(`[${agentId}] â ī¸ Queue timeout won the race - executeTaskNow never completed`);
|
|
586
|
+
}
|
|
587
|
+
// ALWAYS clear processing state - this is critical
|
|
588
|
+
console.log(`đ§š Clearing processing state for ${agentId}`);
|
|
589
|
+
this.agentProcessing.set(agentId, false);
|
|
590
|
+
this.processingStartTime.delete(agentId);
|
|
591
|
+
|
|
592
|
+
// Process next task if queue is not empty
|
|
593
|
+
if (queue.length > 0) {
|
|
594
|
+
if (queueTimeoutFired) {
|
|
595
|
+
// After a force-kill, wait 3s for the dead process to release openclaw's workspace
|
|
596
|
+
// locks before starting the next task â avoids "Agent failed: Unknown error" on restart
|
|
597
|
+
console.log(`đ¤ Waiting 3s for killed process cleanup before next task for ${agentId}...`);
|
|
598
|
+
setTimeout(() => this.processQueue(agentId), 3000);
|
|
599
|
+
} else {
|
|
600
|
+
console.log(`đ¤ Processing next queued task for ${agentId} (${queue.length} remaining)...`);
|
|
601
|
+
setImmediate(() => this.processQueue(agentId));
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
async executeTaskNow({ taskId, agentId, sessionId, message: userMessage, workDir, defaultProjectsPath, image, roomId, roomContext, isMaestro, conversationHistory, browserProfile, agentName, agentEmoji, runnerType }) {
|
|
608
|
+
const isMaestroTask = isMaestro || agentId === 'maestro';
|
|
609
|
+
console.log(`đ¤ Executing task ${taskId} for agent ${agentId}${isMaestroTask ? ' (MAESTRO)' : ''}${browserProfile ? ` [browser: ${browserProfile}]` : ''}`);
|
|
610
|
+
if (sessionId) {
|
|
611
|
+
console.log(` Session: ${sessionId} (resuming conversation)`);
|
|
612
|
+
}
|
|
613
|
+
if (image) {
|
|
614
|
+
console.log(` Image: Provided`);
|
|
615
|
+
}
|
|
616
|
+
if (roomId) {
|
|
617
|
+
console.log(` Room: ${roomId}`);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Track this running task for cancellation support
|
|
621
|
+
this.runningTasks.set(taskId, { agentId, cancelled: false, isMaestro: isMaestroTask });
|
|
622
|
+
|
|
623
|
+
// Hard timeout for entire task - 30 minutes max (large projects with Opus can be slow)
|
|
624
|
+
const TASK_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes hard timeout
|
|
625
|
+
console.log(`âąī¸ Task ${taskId} timeout set for ${TASK_TIMEOUT_MS/1000}s`);
|
|
626
|
+
const taskTimeoutTimer = setTimeout(() => {
|
|
627
|
+
console.log(`â° TIMEOUT FIRED for ${taskId}`);
|
|
628
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
629
|
+
console.log(`â° taskInfo: ${JSON.stringify(taskInfo)}`);
|
|
630
|
+
if (taskInfo && !taskInfo.cancelled) {
|
|
631
|
+
console.log(`â° Task ${taskId} hit hard timeout (${TASK_TIMEOUT_MS/1000}s), force killing`);
|
|
632
|
+
const killed = this.cli.cancelAgent(agentId);
|
|
633
|
+
console.log(`â° cancelAgent returned: ${killed}`);
|
|
634
|
+
|
|
635
|
+
// Collect diagnostics for debug agent BEFORE cleanup
|
|
636
|
+
const diagnostics = this.collectDiagnostics(
|
|
637
|
+
agentId,
|
|
638
|
+
taskId,
|
|
639
|
+
new Error(`Task exceeded ${TASK_TIMEOUT_MS/1000}s hard timeout`),
|
|
640
|
+
'timeout'
|
|
641
|
+
);
|
|
642
|
+
|
|
643
|
+
// CRITICAL: Reset processing state so queue can continue
|
|
644
|
+
// even if executeTaskNow is still hanging
|
|
645
|
+
this.agentProcessing.set(agentId, false);
|
|
646
|
+
this.processingStartTime.delete(agentId);
|
|
647
|
+
this.runningTasks.delete(taskId);
|
|
648
|
+
console.log(`â° Reset processing state for ${agentId}`);
|
|
649
|
+
taskInfo.cancelled = true;
|
|
650
|
+
this.send({
|
|
651
|
+
type: 'task_failed',
|
|
652
|
+
taskId,
|
|
653
|
+
agentId,
|
|
654
|
+
roomId,
|
|
655
|
+
error: `Task timed out after ${TASK_TIMEOUT_MS/1000} seconds`
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
// Send debug report so debug-agent-system can investigate
|
|
659
|
+
this.sendDebugReport(diagnostics, `Task ${taskId} for agent ${agentId} hit hard timeout (${TASK_TIMEOUT_MS/1000}s). cancelAgent returned: ${killed}`);
|
|
660
|
+
}
|
|
661
|
+
}, TASK_TIMEOUT_MS);
|
|
662
|
+
|
|
663
|
+
// Closure set once wrapped handlers are created; callable from both try and catch paths
|
|
664
|
+
let _cleanup = null;
|
|
665
|
+
|
|
666
|
+
try {
|
|
667
|
+
// Agent workspace: always the dedicated /tmp folder for this agent
|
|
668
|
+
// This is where MEMORY.md, AGENTS.md, .canary, identity files live - never pollute user's projects folder
|
|
669
|
+
const agentWorkspaceDir = workDir || process.cwd();
|
|
670
|
+
|
|
671
|
+
// Task cwd: where the agent actually runs commands and reads/writes files
|
|
672
|
+
// If user configured a defaultProjectsPath in settings AND it exists locally, use that
|
|
673
|
+
// so the agent can see all the user's projects without needing to be told where they are
|
|
674
|
+
let taskCwd = agentWorkspaceDir;
|
|
675
|
+
if (defaultProjectsPath && defaultProjectsPath.trim()) {
|
|
676
|
+
const { existsSync } = await import('fs');
|
|
677
|
+
if (existsSync(defaultProjectsPath)) {
|
|
678
|
+
taskCwd = defaultProjectsPath;
|
|
679
|
+
console.log(`đ Using configured projects path as task cwd: ${taskCwd}`);
|
|
680
|
+
} else {
|
|
681
|
+
console.log(`â ī¸ Configured projects path not found locally (${defaultProjectsPath}), using default cwd: ${taskCwd}`);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
// Keep actualWorkDir pointing to the agent's workspace for backward compat
|
|
685
|
+
const actualWorkDir = agentWorkspaceDir;
|
|
686
|
+
|
|
687
|
+
// Select runner â hampagent or openclaw
|
|
688
|
+
const useHampagent = runnerType === 'hampagent';
|
|
689
|
+
const activeRunner = useHampagent ? this.hampagent : this.cli;
|
|
690
|
+
|
|
691
|
+
if (useHampagent) {
|
|
692
|
+
// Pass identity so system prompt knows the agent's name/emoji
|
|
693
|
+
this.hampagent._agentName = agentName || null;
|
|
694
|
+
this.hampagent._agentEmoji = agentEmoji || null;
|
|
695
|
+
console.log(`đĻ
Using Hampagent runner for ${agentId}`);
|
|
696
|
+
} else {
|
|
697
|
+
// Guard against bloated OpenClaw sessions â trim tail, preserve recent context
|
|
698
|
+
this._guardOpenClawSession(agentId);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// Create agent if not exists (always in its dedicated workspace, not the projects folder)
|
|
702
|
+
if (!this.activeAgents.has(agentId)) {
|
|
703
|
+
console.log(`đĻ Creating agent ${agentId} workspace in ${actualWorkDir}`);
|
|
704
|
+
await activeRunner.createAgent(agentId, actualWorkDir);
|
|
705
|
+
this.activeAgents.set(agentId, { workDir: actualWorkDir, taskCwd });
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
// Set up output streaming
|
|
709
|
+
const outputHandler = (data) => {
|
|
710
|
+
if (data.agentId === agentId) {
|
|
711
|
+
// Check if task was cancelled - don't send any more output
|
|
712
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
713
|
+
if (!taskInfo || taskInfo.cancelled) {
|
|
714
|
+
return; // Task cancelled, drop all output
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Record activity to prevent stuck detection from firing
|
|
718
|
+
this.recordAgentActivity(agentId);
|
|
719
|
+
|
|
720
|
+
// Filter out tool error stack traces from room chat
|
|
721
|
+
const text = data.output?.trim();
|
|
722
|
+
if (text && roomId) {
|
|
723
|
+
// Don't broadcast internal tool errors to room chat
|
|
724
|
+
const isToolError = text.includes('tools:') && text.includes('failed stack:') ||
|
|
725
|
+
text.includes('Error:') && text.includes('at file:///') ||
|
|
726
|
+
text.includes('at async') && text.includes('.js:');
|
|
727
|
+
if (isToolError) {
|
|
728
|
+
console.log(` [Filtered tool error from room broadcast]`);
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Send maestro-specific output for maestro tasks
|
|
734
|
+
if (isMaestroTask) {
|
|
735
|
+
// Detect when maestro spawns agents
|
|
736
|
+
const spawnMatch = text.match(/sessions_spawn\s+--id\s+([^\s]+)\s+--message\s+"([^"]+)"/);
|
|
737
|
+
if (spawnMatch) {
|
|
738
|
+
const spawnedAgentId = spawnMatch[1];
|
|
739
|
+
const spawnedTask = spawnMatch[2];
|
|
740
|
+
console.log(`đŧ Maestro spawned agent: ${spawnedAgentId}`);
|
|
741
|
+
|
|
742
|
+
// Emit agent spawned event
|
|
743
|
+
this.send({
|
|
744
|
+
type: 'maestro_agent_spawned',
|
|
745
|
+
taskId,
|
|
746
|
+
agentId: spawnedAgentId,
|
|
747
|
+
task: spawnedTask
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
this.send({
|
|
752
|
+
type: 'maestro_output',
|
|
753
|
+
taskId,
|
|
754
|
+
agentId,
|
|
755
|
+
output: data.output
|
|
756
|
+
});
|
|
757
|
+
} else {
|
|
758
|
+
this.send({
|
|
759
|
+
type: 'task_progress',
|
|
760
|
+
taskId,
|
|
761
|
+
agentId,
|
|
762
|
+
roomId,
|
|
763
|
+
output: data.output,
|
|
764
|
+
isChunk: true
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
};
|
|
769
|
+
|
|
770
|
+
// Handle agent_image events â screenshot the agent wants to send to the user's chat
|
|
771
|
+
const imageHandler = (data) => {
|
|
772
|
+
if (data.agentId === agentId) {
|
|
773
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
774
|
+
if (!taskInfo || taskInfo.cancelled) return;
|
|
775
|
+
this.send({
|
|
776
|
+
type: 'task_progress',
|
|
777
|
+
taskId,
|
|
778
|
+
agentId,
|
|
779
|
+
roomId,
|
|
780
|
+
output: '',
|
|
781
|
+
image: data.image
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
activeRunner.on('agent_image', imageHandler);
|
|
786
|
+
|
|
787
|
+
// Set up error streaming (stderr)
|
|
788
|
+
const errorHandler = (data) => {
|
|
789
|
+
if (data.agentId === agentId) {
|
|
790
|
+
// Check if task was cancelled - don't send any more errors
|
|
791
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
792
|
+
if (!taskInfo || taskInfo.cancelled) {
|
|
793
|
+
return; // Task cancelled, drop all output
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// Filter out common non-critical stderr noise
|
|
797
|
+
const text = data.error?.trim();
|
|
798
|
+
if (!text) return;
|
|
799
|
+
|
|
800
|
+
const isNoise = text.includes('[Canary]') ||
|
|
801
|
+
text.includes('Plugin registered') ||
|
|
802
|
+
text.includes('browser/service') ||
|
|
803
|
+
text.includes('Debugger listening') ||
|
|
804
|
+
text.includes('EADDRINUSE') ||
|
|
805
|
+
text.includes('[openclaw]') ||
|
|
806
|
+
text.includes('Unhandled promise rejection') ||
|
|
807
|
+
text.includes('shared_storage_worklet') ||
|
|
808
|
+
text.includes('playwright-core') ||
|
|
809
|
+
text.includes('targetInfo') ||
|
|
810
|
+
text.includes('[diagnostic]') ||
|
|
811
|
+
text.includes('[stderr]') ||
|
|
812
|
+
text.includes('crBrowser') ||
|
|
813
|
+
text.includes('crConnection');
|
|
814
|
+
|
|
815
|
+
if (!isNoise) {
|
|
816
|
+
this.send({
|
|
817
|
+
type: 'task_error',
|
|
818
|
+
taskId,
|
|
819
|
+
agentId,
|
|
820
|
+
roomId,
|
|
821
|
+
error: text
|
|
822
|
+
});
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
};
|
|
826
|
+
|
|
827
|
+
// Set up tool activity streaming (shows what tool agent is using)
|
|
828
|
+
const toolActivityHandler = (data) => {
|
|
829
|
+
if (data.agentId === agentId) {
|
|
830
|
+
// Record activity to prevent stuck detection from firing
|
|
831
|
+
this.recordAgentActivity(agentId);
|
|
832
|
+
|
|
833
|
+
// tts tool is handled natively by openclaw â do not intercept or emit anything
|
|
834
|
+
|
|
835
|
+
let toolInputPreview;
|
|
836
|
+
if (data.toolInput) {
|
|
837
|
+
try {
|
|
838
|
+
toolInputPreview = JSON.stringify(data.toolInput).slice(0, 2000);
|
|
839
|
+
} catch {
|
|
840
|
+
toolInputPreview = undefined;
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
this.send({
|
|
845
|
+
type: 'tool_activity',
|
|
846
|
+
taskId,
|
|
847
|
+
agentId,
|
|
848
|
+
roomId,
|
|
849
|
+
event: data.event,
|
|
850
|
+
tool: data.tool,
|
|
851
|
+
description: data.description,
|
|
852
|
+
input: toolInputPreview
|
|
853
|
+
});
|
|
854
|
+
}
|
|
855
|
+
};
|
|
856
|
+
|
|
857
|
+
activeRunner.on('agent_output', outputHandler);
|
|
858
|
+
activeRunner.on('agent_error', errorHandler);
|
|
859
|
+
activeRunner.on('tool_activity', toolActivityHandler);
|
|
860
|
+
|
|
861
|
+
// Listen for raw alive signals (any stdout, even filtered) to prevent false stuck detection
|
|
862
|
+
const aliveHandler = (data) => {
|
|
863
|
+
if (data.agentId === agentId) {
|
|
864
|
+
this.recordAgentActivity(agentId);
|
|
865
|
+
}
|
|
866
|
+
};
|
|
867
|
+
activeRunner.on('agent_alive', aliveHandler);
|
|
868
|
+
|
|
869
|
+
// Inactivity: warn at 60s, KILL at 10 minutes of silence (no stdout at all)
|
|
870
|
+
const INACTIVITY_WARN_MS = 60000;
|
|
871
|
+
const INACTIVITY_KILL_MS = 2 * 60 * 1000; // 2 minutes â kill truly hung openclaw
|
|
872
|
+
let lastActivityTime = Date.now();
|
|
873
|
+
let inactivityTimer = null;
|
|
874
|
+
let inactivityKillTimer = null;
|
|
875
|
+
let currentTool = null; // Track which tool is currently running
|
|
876
|
+
let promiseSettled = false; // Prevent kill timer from firing after task completes
|
|
877
|
+
|
|
878
|
+
// Tools that are expected to take a while - don't warn about these
|
|
879
|
+
const QUIET_TOOLS = ['Editing file', 'Writing file', 'Reading file', 'edit', 'write', 'read'];
|
|
880
|
+
|
|
881
|
+
const clearInactivityTimers = () => {
|
|
882
|
+
if (inactivityTimer) { clearTimeout(inactivityTimer); inactivityTimer = null; }
|
|
883
|
+
if (inactivityKillTimer) { clearTimeout(inactivityKillTimer); inactivityKillTimer = null; }
|
|
884
|
+
};
|
|
885
|
+
|
|
886
|
+
const resetInactivityTimer = () => {
|
|
887
|
+
lastActivityTime = Date.now();
|
|
888
|
+
// Also reset the ping-based stuck detector so 300s kill doesn't fire during active work
|
|
889
|
+
this.lastAgentActivity.set(agentId, Date.now());
|
|
890
|
+
this.pingsSinceActivity.set(agentId, 0);
|
|
891
|
+
clearInactivityTimers();
|
|
892
|
+
|
|
893
|
+
// Warn at 30s
|
|
894
|
+
inactivityTimer = setTimeout(() => {
|
|
895
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
896
|
+
if (taskInfo && !taskInfo.cancelled) {
|
|
897
|
+
if (currentTool && QUIET_TOOLS.some(t => currentTool.toLowerCase().includes(t.toLowerCase()))) {
|
|
898
|
+
resetInactivityTimer(); // quiet tool â just reset and check again later
|
|
899
|
+
return;
|
|
900
|
+
}
|
|
901
|
+
const stuckTool = currentTool ? ` while running "${currentTool}"` : '';
|
|
902
|
+
this.send({
|
|
903
|
+
type: 'task_warning',
|
|
904
|
+
taskId,
|
|
905
|
+
agentId,
|
|
906
|
+
roomId,
|
|
907
|
+
warning: `No activity for ${INACTIVITY_WARN_MS/1000} seconds${stuckTool} - agent may be stuck`,
|
|
908
|
+
lastTool: currentTool
|
|
909
|
+
});
|
|
910
|
+
}
|
|
911
|
+
}, INACTIVITY_WARN_MS);
|
|
912
|
+
|
|
913
|
+
// Kill at 10 minutes â openclaw is definitely hung
|
|
914
|
+
inactivityKillTimer = setTimeout(() => {
|
|
915
|
+
if (promiseSettled) return;
|
|
916
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
917
|
+
if (taskInfo && !taskInfo.cancelled) {
|
|
918
|
+
console.warn(`[${agentId}] â ī¸ No output for ${INACTIVITY_KILL_MS/1000}s â openclaw hung mid-task, killing`);
|
|
919
|
+
promiseSettled = true;
|
|
920
|
+
// cancelAgent kills the process tree; OpenClawCLI's close handler will reject runAgentTask
|
|
921
|
+
activeRunner.cancelAgent(agentId);
|
|
922
|
+
}
|
|
923
|
+
}, INACTIVITY_KILL_MS);
|
|
924
|
+
};
|
|
925
|
+
|
|
926
|
+
// Track tool lifecycle
|
|
927
|
+
const toolLifecycleHandler = (data) => {
|
|
928
|
+
if (data.agentId === agentId && data.event) {
|
|
929
|
+
if (data.event === 'start') {
|
|
930
|
+
currentTool = data.description || data.tool;
|
|
931
|
+
} else if (data.event === 'complete') {
|
|
932
|
+
currentTool = null;
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
};
|
|
936
|
+
activeRunner.on('tool_activity', toolLifecycleHandler);
|
|
937
|
+
|
|
938
|
+
// Reset timer on any activity
|
|
939
|
+
const activityWrapper = (handler) => (data) => {
|
|
940
|
+
resetInactivityTimer();
|
|
941
|
+
handler(data);
|
|
942
|
+
};
|
|
943
|
+
|
|
944
|
+
// Wrap handlers to track activity
|
|
945
|
+
const wrappedOutputHandler = activityWrapper(outputHandler);
|
|
946
|
+
const wrappedToolHandler = activityWrapper(toolActivityHandler);
|
|
947
|
+
const wrappedAliveHandler = activityWrapper(aliveHandler);
|
|
948
|
+
|
|
949
|
+
activeRunner.off('agent_output', outputHandler);
|
|
950
|
+
activeRunner.off('tool_activity', toolActivityHandler);
|
|
951
|
+
activeRunner.off('agent_alive', aliveHandler);
|
|
952
|
+
activeRunner.on('agent_output', wrappedOutputHandler);
|
|
953
|
+
activeRunner.on('tool_activity', wrappedToolHandler);
|
|
954
|
+
activeRunner.on('agent_alive', wrappedAliveHandler);
|
|
955
|
+
|
|
956
|
+
// Capture cleanup as a callable closure so both try and catch paths can use it
|
|
957
|
+
_cleanup = () => {
|
|
958
|
+
promiseSettled = true; // Prevent inactivity kill timer from firing after task ends
|
|
959
|
+
clearInactivityTimers();
|
|
960
|
+
activeRunner.off('agent_output', wrappedOutputHandler);
|
|
961
|
+
activeRunner.off('agent_error', errorHandler);
|
|
962
|
+
activeRunner.off('tool_activity', wrappedToolHandler);
|
|
963
|
+
activeRunner.off('tool_activity', toolLifecycleHandler);
|
|
964
|
+
activeRunner.off('agent_alive', wrappedAliveHandler);
|
|
965
|
+
activeRunner.off('agent_image', imageHandler);
|
|
966
|
+
};
|
|
967
|
+
|
|
968
|
+
resetInactivityTimer(); // Start the timer
|
|
969
|
+
|
|
970
|
+
// Build message with room context if in a room
|
|
971
|
+
let finalMessage = userMessage;
|
|
972
|
+
if (roomContext) {
|
|
973
|
+
// Prepend room context to message
|
|
974
|
+
const contextInfo = [
|
|
975
|
+
`You are in a multi-agent room: ${roomContext.roomName}`,
|
|
976
|
+
`Participants: ${roomContext.participants.map(p => `${p.emoji} ${p.name}${p.isYou ? ' (you)' : ''}`).join(', ')}`,
|
|
977
|
+
roomContext.instructions,
|
|
978
|
+
'',
|
|
979
|
+
`User message: ${userMessage}`
|
|
980
|
+
].join('\n');
|
|
981
|
+
finalMessage = contextInfo;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
// Inject platform context into EVERY message so the agent always knows:
|
|
985
|
+
// 1. What platform it's running on and its URL
|
|
986
|
+
// 2. Where the user's projects folder is
|
|
987
|
+
// 3. Screenshot capabilities
|
|
988
|
+
const platformContext = [
|
|
989
|
+
`[System context:`,
|
|
990
|
+
`- Platform: AgentForge.ai. Dashboard: https://agentforgeai-production.up.railway.app/dashboard. CRITICAL: Always use the built-in 'browser' tool for ALL web browsing AND web searches â NEVER use the 'web_search' tool (no API keys are configured), NEVER run shell commands like 'open', 'google-chrome', 'chromium', or any OS command to launch a browser. The browser tool connects to AgentForge Browser (port 9223) automatically. To search: use browser to navigate to google.com or perplexity.ai.`,
|
|
991
|
+
`- Your runner: ${useHampagent ? 'Hampagent' : 'OpenClaw'}.`,
|
|
992
|
+
(!conversationHistory || conversationHistory.length === 0)
|
|
993
|
+
? `- This is the first message. When greeting, say: "I'm [your name] â your ${useHampagent ? 'Hampagent' : 'OpenClaw'} agent running on AgentForge." Never say "autonomous AI agent". Never list capabilities in an intro.`
|
|
994
|
+
: `- This is a continuing conversation. Do NOT re-introduce yourself.`,
|
|
995
|
+
agentName
|
|
996
|
+
? `- Your name is "${agentName}"${agentEmoji ? ` ${agentEmoji}` : ''}. This is your AgentForge identity. Do not ask the user who you are or what your name is â you already know.`
|
|
997
|
+
: null,
|
|
998
|
+
taskCwd && taskCwd !== agentWorkspaceDir
|
|
999
|
+
? `- Working directory: "${taskCwd}" â user's projects folder. Check here first for any project by name.`
|
|
1000
|
+
: null,
|
|
1001
|
+
agentWorkspaceDir
|
|
1002
|
+
? `- Screenshots: screencapture -x ${agentWorkspaceDir}/ss1.png && sips -Z 1280 ${agentWorkspaceDir}/ss1.png (MUST resize â API rejects images over 5MB). Send to chat with: echo "AGENTFORGE_IMAGE:${agentWorkspaceDir}/ss1.png". Always screenshot visual work before saying done. NEVER use "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --headless for screenshots â use screencapture only.`
|
|
1003
|
+
: `- Screenshots: screencapture -x /tmp/ss1.png && sips -Z 1280 /tmp/ss1.png (MUST resize â API rejects images over 5MB). Send to chat with: echo "AGENTFORGE_IMAGE:/tmp/ss1.png". Always screenshot visual work before saying done. NEVER use "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --headless for screenshots â use screencapture only.`,
|
|
1004
|
+
`]`
|
|
1005
|
+
].filter(Boolean).join('\n');
|
|
1006
|
+
finalMessage = platformContext + '\n\n' + finalMessage;
|
|
1007
|
+
|
|
1008
|
+
// If conversation history was loaded from DB (e.g. session expired, worker restarted,
|
|
1009
|
+
// or user returning hours later), prepend it so the agent has full context.
|
|
1010
|
+
// Only do this for non-room tasks and when we actually have history.
|
|
1011
|
+
if (!roomContext && conversationHistory && conversationHistory.length > 0) {
|
|
1012
|
+
// When the gateway is configured, openclaw maintains session state natively via the
|
|
1013
|
+
// x-openclaw-session-key header â no need to inject history manually.
|
|
1014
|
+
// Only inject history when gateway is unavailable (subprocess fallback).
|
|
1015
|
+
// Hampagent manages its own session history natively â never inject DB history for it
|
|
1016
|
+
const gatewayActive = !!(this.cli.gatewayPort && this.cli.gatewayToken);
|
|
1017
|
+
const sessionExists = useHampagent || gatewayActive;
|
|
1018
|
+
|
|
1019
|
+
if (!sessionExists) {
|
|
1020
|
+
// Session is gone â inject DB history as context prefix so the agent remembers
|
|
1021
|
+
console.log(`đ Session workspace not found for ${agentId}, injecting ${conversationHistory.length} messages from DB history`);
|
|
1022
|
+
// Strip tool errors and stack traces from stored assistant messages before injecting
|
|
1023
|
+
const stripHistoryNoise = (text) => {
|
|
1024
|
+
if (!text) return text;
|
|
1025
|
+
return text.split('\n').filter(line => {
|
|
1026
|
+
const t = line.trim();
|
|
1027
|
+
return !(
|
|
1028
|
+
t.startsWith('tools:') ||
|
|
1029
|
+
t.includes('failed stack:') ||
|
|
1030
|
+
/^\s*at\s+\S/.test(line) ||
|
|
1031
|
+
t.startsWith('at file:///') || t.startsWith('at async ') || t.startsWith('at Object.')
|
|
1032
|
+
);
|
|
1033
|
+
}).join('\n').trim();
|
|
1034
|
+
};
|
|
1035
|
+
const stripSystemContext = (text) => {
|
|
1036
|
+
if (!text) return text;
|
|
1037
|
+
// Remove stale [System context: ...] blocks injected by older worker builds
|
|
1038
|
+
// (e.g. old Chrome screenshot instructions). Current context is always re-injected fresh.
|
|
1039
|
+
return text.replace(/\[System context:[\s\S]*?\n\]/g, '').trim();
|
|
1040
|
+
};
|
|
1041
|
+
const historyText = conversationHistory
|
|
1042
|
+
.slice(-5) // last 5 messages â keep context small to prevent API hangs
|
|
1043
|
+
.map(msg => {
|
|
1044
|
+
const role = msg.role === 'user' ? 'User' : 'Assistant';
|
|
1045
|
+
const content = msg.role === 'user'
|
|
1046
|
+
? stripSystemContext(msg.content)
|
|
1047
|
+
: stripHistoryNoise(msg.content);
|
|
1048
|
+
return `${role}: ${content}`;
|
|
1049
|
+
})
|
|
1050
|
+
.join('\n\n');
|
|
1051
|
+
// Prepend history to finalMessage (which already contains platform context + userMessage)
|
|
1052
|
+
finalMessage = `[Conversation history â you are resuming a prior session]\n\n${historyText}\n\n[End of history]\n\n${finalMessage}`;
|
|
1053
|
+
} else {
|
|
1054
|
+
console.log(`đ Session workspace found for ${agentId}, openclaw will use its own session memory`);
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
// ââ Skills: load matching skill files from agent workspace ââââââââââââââ
|
|
1059
|
+
// Skills are .md files in {workspace}/skills/ with YAML frontmatter.
|
|
1060
|
+
// Matching ones are prepended so the agent has relevant context upfront.
|
|
1061
|
+
if (!isMaestroTask && !roomContext) {
|
|
1062
|
+
const skillContext = this.loadMatchingSkills(actualWorkDir, userMessage);
|
|
1063
|
+
if (skillContext) {
|
|
1064
|
+
console.log(`[${taskId}] đ Injecting matching skill(s)`);
|
|
1065
|
+
finalMessage = `[Relevant skill from your skills library]\n${skillContext}\n\n---\n\n${finalMessage}`;
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
// No verification protocol â it adds tokens and causes openclaw to hang on large contexts
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
// ââ Iteration loop: run up to 3 times, refining based on agent feedback â
|
|
1072
|
+
const MAX_ITERATIONS = isMaestroTask || roomContext ? 1 : 3;
|
|
1073
|
+
let iteration = 0;
|
|
1074
|
+
let taskResult;
|
|
1075
|
+
let iterationMessage = finalMessage;
|
|
1076
|
+
|
|
1077
|
+
while (iteration < MAX_ITERATIONS) {
|
|
1078
|
+
iteration++;
|
|
1079
|
+
|
|
1080
|
+
if (iteration > 1) {
|
|
1081
|
+
console.log(`[${taskId}] đ Iteration ${iteration}/${MAX_ITERATIONS}`);
|
|
1082
|
+
this.send({
|
|
1083
|
+
type: 'task_iteration',
|
|
1084
|
+
taskId,
|
|
1085
|
+
agentId,
|
|
1086
|
+
roomId,
|
|
1087
|
+
iteration,
|
|
1088
|
+
maxIterations: MAX_ITERATIONS
|
|
1089
|
+
});
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
console.log(`[${taskId}] đ Runner: ${useHampagent ? '⥠HAMPAGENT' : 'đ§ OPENCLAW'} â agent ${agentId} iteration ${iteration}`);
|
|
1093
|
+
const runAgentStart = Date.now();
|
|
1094
|
+
taskResult = await activeRunner.runAgentTask(
|
|
1095
|
+
agentId, iterationMessage, taskCwd, sessionId, iteration === 1 ? image : null, browserProfile, actualWorkDir
|
|
1096
|
+
);
|
|
1097
|
+
const runAgentDuration = Date.now() - runAgentStart;
|
|
1098
|
+
console.log(`[${taskId}] runAgentTask iteration ${iteration} returned after ${runAgentDuration}ms, success=${taskResult?.success}`);
|
|
1099
|
+
|
|
1100
|
+
const output = taskResult?.result?.output || '';
|
|
1101
|
+
|
|
1102
|
+
if (output.includes('â TASK_COMPLETE')) {
|
|
1103
|
+
if (iteration > 1) {
|
|
1104
|
+
// Save a skill stub so future similar tasks start better
|
|
1105
|
+
this.saveIteratedSkill(agentId, actualWorkDir, userMessage, iteration).catch(() => {});
|
|
1106
|
+
this.send({
|
|
1107
|
+
type: 'task_skill_saved',
|
|
1108
|
+
taskId,
|
|
1109
|
+
agentId,
|
|
1110
|
+
roomId,
|
|
1111
|
+
iterations: iteration
|
|
1112
|
+
});
|
|
1113
|
+
}
|
|
1114
|
+
break;
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
// Check for iterate signal â if present and iterations remain, continue
|
|
1118
|
+
const iterateMatch = output.match(/âģ ITERATE:\s*(.+?)(?:\n|$)/);
|
|
1119
|
+
if (iterateMatch && iteration < MAX_ITERATIONS) {
|
|
1120
|
+
const feedback = iterateMatch[1].trim();
|
|
1121
|
+
console.log(`[${taskId}] Agent self-correcting: ${feedback}`);
|
|
1122
|
+
iterationMessage = `Your previous attempt had this issue: "${feedback}"\n\nPlease revise your work to fix it. When done, end with â TASK_COMPLETE or âģ ITERATE: [remaining issue].`;
|
|
1123
|
+
} else {
|
|
1124
|
+
break; // No iterate signal or max iterations reached
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// Clean up listeners and timer
|
|
1129
|
+
if (_cleanup) { _cleanup(); _cleanup = null; }
|
|
1130
|
+
|
|
1131
|
+
// Check if task was cancelled or already handled by timeout
|
|
1132
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
1133
|
+
if (!taskInfo || taskInfo.cancelled) {
|
|
1134
|
+
console.log(`â ī¸ Task ${taskId} was already handled (cancelled/timeout), skipping`);
|
|
1135
|
+
return;
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
// Use AgentForge-assigned identity when provided (from task_assign).
|
|
1139
|
+
// Fall back to querying OpenClaw only when no name was supplied.
|
|
1140
|
+
let identity = { identityName: agentId, identityEmoji: 'đ¤' };
|
|
1141
|
+
if (agentName) {
|
|
1142
|
+
identity = { identityName: agentName, identityEmoji: agentEmoji || 'đ¤' };
|
|
1143
|
+
console.log(`[${taskId}] Using AgentForge identity: ${identity.identityName}`);
|
|
1144
|
+
} else {
|
|
1145
|
+
console.log(`[${taskId}] Getting agent identity from OpenClaw...`);
|
|
1146
|
+
const identityStart = Date.now();
|
|
1147
|
+
try {
|
|
1148
|
+
const identityPromise = this.cli.getAgentIdentity(agentId);
|
|
1149
|
+
const timeoutPromise = new Promise(r => setTimeout(() => r(null), 5000)); // 5s max
|
|
1150
|
+
const result = await Promise.race([identityPromise, timeoutPromise]);
|
|
1151
|
+
if (result) {
|
|
1152
|
+
identity = result;
|
|
1153
|
+
} else {
|
|
1154
|
+
console.log(`[${taskId}] â ī¸ getAgentIdentity timed out after ${Date.now() - identityStart}ms`);
|
|
1155
|
+
}
|
|
1156
|
+
} catch (e) {
|
|
1157
|
+
console.log(`[${taskId}] â ī¸ getAgentIdentity failed after ${Date.now() - identityStart}ms: ${e.message}`);
|
|
1158
|
+
}
|
|
1159
|
+
console.log(`[${taskId}] Got identity in ${Date.now() - identityStart}ms: ${identity.identityName}`);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Send completion with identity info, final response text, and sessionId for maestro
|
|
1163
|
+
// Filter openclaw's "No reply from agent." placeholder â it appears when the agent only
|
|
1164
|
+
// used tools with no text response (e.g. TTS-only tasks). If we send it, the browser's
|
|
1165
|
+
// filterAgentMarkers will strip it, leaving a blank bubble. Instead, leave response
|
|
1166
|
+
// undefined so the server falls back to the accumulated task_progress text (e.g. "đ Spoke aloud: ...").
|
|
1167
|
+
const rawOutput = taskResult?.result?.output || '';
|
|
1168
|
+
console.log(`[${taskId}] đ taskResult.result.output (${rawOutput.length} chars): "${rawOutput.slice(0, 200)}"`);
|
|
1169
|
+
let finalOutput = rawOutput.trim();
|
|
1170
|
+
if (/^no reply from agent\.?$/i.test(finalOutput)) {
|
|
1171
|
+
console.log(`[${taskId}] đ Filtered "No reply from agent." from finalOutput`);
|
|
1172
|
+
finalOutput = '';
|
|
1173
|
+
}
|
|
1174
|
+
// If the output is just an error stack trace / tool failure message, clear it
|
|
1175
|
+
// so the server falls back to accumulated task_progress text instead
|
|
1176
|
+
if (finalOutput &&
|
|
1177
|
+
(finalOutput.startsWith('tools:') || finalOutput.startsWith('Error:') ||
|
|
1178
|
+
finalOutput.includes('failed stack:') || finalOutput.includes('at file:///') ||
|
|
1179
|
+
/^\s*at\s+/.test(finalOutput))) {
|
|
1180
|
+
console.log(`[${taskId}] đ Filtered error-only output from finalOutput: "${finalOutput.slice(0,100)}"`);
|
|
1181
|
+
finalOutput = '';
|
|
1182
|
+
}
|
|
1183
|
+
// If the task succeeded but produced no text, emit a minimal completion token
|
|
1184
|
+
// so the browser never shows "No response received" for a successful task.
|
|
1185
|
+
if (!finalOutput && taskResult?.success) {
|
|
1186
|
+
finalOutput = 'â Done.';
|
|
1187
|
+
console.log(`[${taskId}] âšī¸ Task succeeded with no text output â using default completion message`);
|
|
1188
|
+
}
|
|
1189
|
+
console.log(`[${taskId}] đ¤ finalOutput="${finalOutput.slice(0,100)}" response=${finalOutput ? `"${finalOutput.slice(0,80)}"` : 'undefined'}`);
|
|
1190
|
+
const completionMessage = {
|
|
1191
|
+
type: 'task_complete',
|
|
1192
|
+
taskId,
|
|
1193
|
+
agentId,
|
|
1194
|
+
roomId,
|
|
1195
|
+
identity,
|
|
1196
|
+
response: finalOutput || undefined
|
|
1197
|
+
};
|
|
1198
|
+
|
|
1199
|
+
// Include sessionId for maestro to maintain conversation
|
|
1200
|
+
if (isMaestroTask && sessionId) {
|
|
1201
|
+
completionMessage.sessionId = sessionId;
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
console.log(`[${taskId}] Sending completion message...`);
|
|
1205
|
+
this.send(completionMessage);
|
|
1206
|
+
|
|
1207
|
+
console.log(`â
Task ${taskId} completed (${identity.identityName})`);
|
|
1208
|
+
|
|
1209
|
+
// Clear hard timeout and clean up
|
|
1210
|
+
console.log(`[${taskId}] Clearing timeout and task tracking...`);
|
|
1211
|
+
clearTimeout(taskTimeoutTimer);
|
|
1212
|
+
this.runningTasks.delete(taskId);
|
|
1213
|
+
console.log(`[${taskId}] executeTaskNow DONE - returning normally`);
|
|
1214
|
+
} catch (error) {
|
|
1215
|
+
// Always clean up listeners â this path was previously missing cleanup,
|
|
1216
|
+
// causing stacked listeners across multiple task runs
|
|
1217
|
+
if (_cleanup) { _cleanup(); _cleanup = null; }
|
|
1218
|
+
|
|
1219
|
+
// Clear hard timeout
|
|
1220
|
+
clearTimeout(taskTimeoutTimer);
|
|
1221
|
+
|
|
1222
|
+
// Check if this was a cancellation or timeout
|
|
1223
|
+
// If taskInfo is missing, timeout already handled it
|
|
1224
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
1225
|
+
if (!taskInfo || taskInfo.cancelled) {
|
|
1226
|
+
console.log(`â ī¸ Task ${taskId} was already cancelled/timed out, ignoring error`);
|
|
1227
|
+
return;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
console.error(`â Task ${taskId} failed:`, error);
|
|
1231
|
+
|
|
1232
|
+
// Collect detailed diagnostics
|
|
1233
|
+
const diagnostics = this.collectDiagnostics(agentId, taskId, error, 'error');
|
|
1234
|
+
|
|
1235
|
+
// Kill the agent process â try both runners (only one will be active)
|
|
1236
|
+
this.cli.cancelAgent(agentId);
|
|
1237
|
+
this.hampagent?.cancelAgent(agentId);
|
|
1238
|
+
|
|
1239
|
+
this.send({
|
|
1240
|
+
type: 'task_failed',
|
|
1241
|
+
taskId,
|
|
1242
|
+
agentId,
|
|
1243
|
+
roomId,
|
|
1244
|
+
error: error.message,
|
|
1245
|
+
stack: error.stack
|
|
1246
|
+
});
|
|
1247
|
+
|
|
1248
|
+
// Send debug report for investigation
|
|
1249
|
+
this.sendDebugReport(diagnostics, `Agent ${agentId} failed during task ${taskId}: ${error.message}`);
|
|
1250
|
+
|
|
1251
|
+
// Clean up task tracking
|
|
1252
|
+
this.runningTasks.delete(taskId);
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
async cancelTask(taskId) {
|
|
1257
|
+
console.log(`đ CANCEL RECEIVED for task ${taskId}`);
|
|
1258
|
+
|
|
1259
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
1260
|
+
if (!taskInfo) {
|
|
1261
|
+
console.log(`â ī¸ Task ${taskId} not found in runningTasks (size: ${this.runningTasks.size})`);
|
|
1262
|
+
console.log(` runningTasks keys: ${[...this.runningTasks.keys()].join(', ')}`);
|
|
1263
|
+
// Still send cancelled message to browser to clear UI state
|
|
1264
|
+
this.send({
|
|
1265
|
+
type: 'task_cancelled',
|
|
1266
|
+
taskId
|
|
1267
|
+
});
|
|
1268
|
+
return;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
const { agentId } = taskInfo;
|
|
1272
|
+
console.log(`đ Found task for agent ${agentId}, cancelling...`);
|
|
1273
|
+
|
|
1274
|
+
// Mark as cancelled so completion handler knows
|
|
1275
|
+
taskInfo.cancelled = true;
|
|
1276
|
+
|
|
1277
|
+
// Kill the running process â try both runners (only one will be active)
|
|
1278
|
+
console.log(`đ Calling cancelAgent for ${agentId}...`);
|
|
1279
|
+
const killed = this.cli.cancelAgent(agentId) || this.hampagent?.cancelAgent(agentId) || false;
|
|
1280
|
+
console.log(`đ cancelAgent returned: ${killed}`);
|
|
1281
|
+
|
|
1282
|
+
// Always clear state and notify browser, regardless of kill success
|
|
1283
|
+
this.agentQueues.set(agentId, []);
|
|
1284
|
+
this.agentProcessing.set(agentId, false);
|
|
1285
|
+
this.processingStartTime.delete(agentId);
|
|
1286
|
+
|
|
1287
|
+
// Notify the server that the task was cancelled
|
|
1288
|
+
this.send({
|
|
1289
|
+
type: 'task_cancelled',
|
|
1290
|
+
taskId,
|
|
1291
|
+
agentId
|
|
1292
|
+
});
|
|
1293
|
+
|
|
1294
|
+
console.log(`â
Task ${taskId} cancelled, kill result: ${killed}`);
|
|
1295
|
+
|
|
1296
|
+
// Clean up
|
|
1297
|
+
this.runningTasks.delete(taskId);
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
async cancelTaskByAgent(agentId) {
|
|
1301
|
+
console.log(`đ Cancelling task for agent ${agentId}`);
|
|
1302
|
+
|
|
1303
|
+
// Find the running task for this agent
|
|
1304
|
+
let taskId = null;
|
|
1305
|
+
for (const [tid, info] of this.runningTasks.entries()) {
|
|
1306
|
+
if (info.agentId === agentId) {
|
|
1307
|
+
taskId = tid;
|
|
1308
|
+
break;
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
if (taskId) {
|
|
1313
|
+
await this.cancelTask(taskId);
|
|
1314
|
+
} else {
|
|
1315
|
+
// Still try to kill the process even if we don't have a taskId â try both runners
|
|
1316
|
+
const killed = this.cli.cancelAgent(agentId) || this.hampagent?.cancelAgent(agentId) || false;
|
|
1317
|
+
|
|
1318
|
+
// Always clear the queue and state for this agent
|
|
1319
|
+
this.agentQueues.set(agentId, []);
|
|
1320
|
+
this.agentProcessing.set(agentId, false);
|
|
1321
|
+
this.processingStartTime.delete(agentId);
|
|
1322
|
+
|
|
1323
|
+
if (killed) {
|
|
1324
|
+
console.log(`â
Agent ${agentId} task cancelled successfully`);
|
|
1325
|
+
} else {
|
|
1326
|
+
console.log(`â ī¸ No running task found for agent ${agentId}, clearing state anyway`);
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
// ALWAYS send task_cancelled to clear browser UI state, regardless of kill success
|
|
1330
|
+
this.send({
|
|
1331
|
+
type: 'task_cancelled',
|
|
1332
|
+
agentId
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
|
|
1337
|
+
// ââ Skills System âââââââââââââââââââââââââââââââââââââââââââââââââââââââââ
|
|
1338
|
+
|
|
1339
|
+
/**
|
|
1340
|
+
* Load skill files from agent workspace that match the current task.
|
|
1341
|
+
* Skills are .md files in {agentWorkspaceDir}/skills/ with YAML frontmatter.
|
|
1342
|
+
* Matching: description field must share 2+ words (>4 chars) with the task.
|
|
1343
|
+
*/
|
|
1344
|
+
loadMatchingSkills(agentWorkspaceDir, task) {
|
|
1345
|
+
try {
|
|
1346
|
+
const skillsDir = path.join(agentWorkspaceDir, 'skills');
|
|
1347
|
+
if (!existsSync(skillsDir)) return '';
|
|
1348
|
+
|
|
1349
|
+
const files = readdirSync(skillsDir).filter(f => f.endsWith('.md'));
|
|
1350
|
+
if (files.length === 0) return '';
|
|
1351
|
+
|
|
1352
|
+
const taskWords = new Set(
|
|
1353
|
+
task.toLowerCase().split(/\W+/).filter(w => w.length > 4)
|
|
1354
|
+
);
|
|
1355
|
+
const matched = [];
|
|
1356
|
+
|
|
1357
|
+
for (const file of files) {
|
|
1358
|
+
try {
|
|
1359
|
+
const content = readFileSync(path.join(skillsDir, file), 'utf-8');
|
|
1360
|
+
const descMatch = content.match(/^---[\s\S]*?\ndescription:\s*["']?(.+?)["']?\s*\n[\s\S]*?---/m);
|
|
1361
|
+
if (descMatch) {
|
|
1362
|
+
const descWords = descMatch[1].toLowerCase().split(/\W+/).filter(w => w.length > 4);
|
|
1363
|
+
const overlap = descWords.filter(w => taskWords.has(w)).length;
|
|
1364
|
+
if (overlap >= 2) {
|
|
1365
|
+
matched.push(content.replace(/^---[\s\S]*?---\n/, '').trim());
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
} catch { /* skip unreadable files */ }
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
return matched.join('\n\n---\n\n');
|
|
1372
|
+
} catch {
|
|
1373
|
+
return '';
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
/**
|
|
1378
|
+
* Generate a short kebab-case name from a task description.
|
|
1379
|
+
*/
|
|
1380
|
+
generateSkillName(task) {
|
|
1381
|
+
const stopwords = new Set(['that', 'this', 'with', 'from', 'have', 'been', 'your', 'will', 'would', 'could', 'should', 'please', 'make', 'create', 'build', 'system', 'context']);
|
|
1382
|
+
const words = task.toLowerCase()
|
|
1383
|
+
.replace(/[^a-z0-9\s]/g, '')
|
|
1384
|
+
.split(/\s+/)
|
|
1385
|
+
.filter(w => w.length > 3 && !stopwords.has(w))
|
|
1386
|
+
.slice(0, 3);
|
|
1387
|
+
return words.length > 0 ? words.join('-') : 'task-skill';
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
/**
|
|
1391
|
+
* After a task that required >1 iteration succeeds, write a skill stub
|
|
1392
|
+
* to the agent workspace so future similar tasks start from a better position.
|
|
1393
|
+
*/
|
|
1394
|
+
async saveIteratedSkill(agentId, agentWorkspaceDir, originalTask, iterations) {
|
|
1395
|
+
try {
|
|
1396
|
+
const skillsDir = path.join(agentWorkspaceDir, 'skills');
|
|
1397
|
+
mkdirSync(skillsDir, { recursive: true });
|
|
1398
|
+
|
|
1399
|
+
const skillName = this.generateSkillName(originalTask);
|
|
1400
|
+
const skillPath = path.join(skillsDir, `${skillName}.md`);
|
|
1401
|
+
if (existsSync(skillPath)) return; // Don't overwrite existing skill
|
|
1402
|
+
|
|
1403
|
+
// Strip injected context markers from the saved task description
|
|
1404
|
+
const taskPreview = originalTask
|
|
1405
|
+
.replace(/\[System context[\s\S]*?\]/g, '')
|
|
1406
|
+
.replace(/\[Relevant skill\][\s\S]*?---\n\n/g, '')
|
|
1407
|
+
.replace(/---\n\[VERIFICATION PROTOCOL\][\s\S]*$/m, '')
|
|
1408
|
+
.trim()
|
|
1409
|
+
.slice(0, 300);
|
|
1410
|
+
|
|
1411
|
+
const content = `---
|
|
1412
|
+
name: ${skillName}
|
|
1413
|
+
description: "Use for tasks involving: ${taskPreview.slice(0, 120).replace(/"/g, "'")}"
|
|
1414
|
+
---
|
|
1415
|
+
|
|
1416
|
+
## Task That Triggered This Skill
|
|
1417
|
+
${taskPreview}
|
|
1418
|
+
|
|
1419
|
+
## Notes
|
|
1420
|
+
This skill was auto-generated after the task required ${iterations} iterations.
|
|
1421
|
+
Review and add specific steps, pitfalls, and patterns that helped succeed.
|
|
1422
|
+
`;
|
|
1423
|
+
writeFileSync(skillPath, content, 'utf-8');
|
|
1424
|
+
console.log(`[${agentId}] đ Auto-saved skill: skills/${skillName}.md`);
|
|
1425
|
+
} catch (e) {
|
|
1426
|
+
console.log(`â ī¸ Skill save failed: ${e.message}`);
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
// ââ End Skills System ââââââââââââââââââââââââââââââââââââââââââââââââââââââ
|
|
1431
|
+
|
|
1432
|
+
// ââ Designer file relay ââââââââââââââââââââââââââââââââââââââââââââââââââââ
|
|
1433
|
+
|
|
1434
|
+
async handleReadAgentFiles(message) {
|
|
1435
|
+
const { requestId, agentId } = message;
|
|
1436
|
+
const workDir = `/tmp/agentforge/agents/${agentId}`;
|
|
1437
|
+
const DEFAULT_FILES = ['IDENTITY.md', 'SOUL.md', 'AGENTS.md', 'MEMORY.md', 'AGENTFORGE.md', 'BOOTSTRAP.md', 'HEARTBEAT.md', 'TOOLS.md', 'USER.md'];
|
|
1438
|
+
const results = {};
|
|
1439
|
+
|
|
1440
|
+
for (const filename of DEFAULT_FILES) {
|
|
1441
|
+
try {
|
|
1442
|
+
results[filename] = readFileSync(path.join(workDir, filename), 'utf8');
|
|
1443
|
+
} catch {
|
|
1444
|
+
results[filename] = null;
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
|
|
1448
|
+
// Also pick up any extra .md files in the workspace
|
|
1449
|
+
try {
|
|
1450
|
+
const entries = readdirSync(workDir).filter(e => e.endsWith('.md') && !DEFAULT_FILES.includes(e));
|
|
1451
|
+
for (const entry of entries) {
|
|
1452
|
+
try {
|
|
1453
|
+
results[entry] = readFileSync(path.join(workDir, entry), 'utf8');
|
|
1454
|
+
} catch {
|
|
1455
|
+
results[entry] = null;
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
} catch { /* workspace may not exist yet */ }
|
|
1459
|
+
|
|
1460
|
+
this.send({ type: 'agent_files_result', requestId, files: results });
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
async handleWriteAgentFile(message) {
|
|
1464
|
+
const { requestId, agentId, filename, content } = message;
|
|
1465
|
+
const workDir = `/tmp/agentforge/agents/${agentId}`;
|
|
1466
|
+
try {
|
|
1467
|
+
if (!existsSync(workDir)) mkdirSync(workDir, { recursive: true });
|
|
1468
|
+
writeFileSync(path.join(workDir, filename), content, 'utf8');
|
|
1469
|
+
this.send({ type: 'write_agent_file_result', requestId, success: true });
|
|
1470
|
+
} catch (err) {
|
|
1471
|
+
this.send({ type: 'write_agent_file_result', requestId, success: false, error: err.message });
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
send(message) {
|
|
1476
|
+
// Deduplicate task completions to prevent double-sends
|
|
1477
|
+
if (message.type === 'task_complete' && message.taskId) {
|
|
1478
|
+
if (this.recentCompletions.has(message.taskId)) {
|
|
1479
|
+
console.log(`â ī¸ Skipping duplicate task_complete for ${message.taskId}`);
|
|
1480
|
+
return;
|
|
1481
|
+
}
|
|
1482
|
+
this.recentCompletions.add(message.taskId);
|
|
1483
|
+
// Clean up after TTL
|
|
1484
|
+
setTimeout(() => this.recentCompletions.delete(message.taskId), this.completionTTL);
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
1488
|
+
this.ws.send(JSON.stringify(message));
|
|
1489
|
+
} else {
|
|
1490
|
+
// Queue important messages (completions, failures) for retry after reconnect
|
|
1491
|
+
const importantTypes = ['task_complete', 'task_failed', 'task_cancelled'];
|
|
1492
|
+
if (importantTypes.includes(message.type)) {
|
|
1493
|
+
if (this.pendingMessages.length < this.maxPendingMessages) {
|
|
1494
|
+
console.log(`đĻ Queuing ${message.type} for ${message.taskId || message.agentId} (websocket disconnected)`);
|
|
1495
|
+
this.pendingMessages.push(message);
|
|
1496
|
+
} else {
|
|
1497
|
+
console.warn(`â ī¸ Pending message queue full, dropping ${message.type}`);
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
resetStaleProcessingStates() {
|
|
1504
|
+
const now = Date.now();
|
|
1505
|
+
let resetCount = 0;
|
|
1506
|
+
|
|
1507
|
+
for (const [agentId, isProcessing] of this.agentProcessing.entries()) {
|
|
1508
|
+
if (isProcessing) {
|
|
1509
|
+
const startTime = this.processingStartTime.get(agentId);
|
|
1510
|
+
const elapsed = startTime ? now - startTime : Infinity;
|
|
1511
|
+
|
|
1512
|
+
// Reset if processing for too long OR if we just reconnected (previous task is dead)
|
|
1513
|
+
if (elapsed > this.PROCESSING_TIMEOUT_MS || !startTime) {
|
|
1514
|
+
console.log(`đ Resetting stale processing state for ${agentId} (was stuck for ${Math.round(elapsed/1000)}s)`);
|
|
1515
|
+
this.agentProcessing.set(agentId, false);
|
|
1516
|
+
this.processingStartTime.delete(agentId);
|
|
1517
|
+
resetCount++;
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
if (resetCount > 0) {
|
|
1523
|
+
console.log(`đ Reset ${resetCount} stale processing states`);
|
|
1524
|
+
}
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
processAllQueues() {
|
|
1528
|
+
// Kick-start processing for any queues that have pending tasks
|
|
1529
|
+
for (const [agentId, queue] of this.agentQueues.entries()) {
|
|
1530
|
+
if (queue.length > 0 && !this.agentProcessing.get(agentId)) {
|
|
1531
|
+
console.log(`đ¤ Resuming queue for ${agentId} (${queue.length} tasks waiting)`);
|
|
1532
|
+
this.processQueue(agentId);
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
flushPendingMessages() {
|
|
1538
|
+
if (this.pendingMessages.length === 0) return;
|
|
1539
|
+
|
|
1540
|
+
console.log(`đ¤ Flushing ${this.pendingMessages.length} pending messages...`);
|
|
1541
|
+
const messages = [...this.pendingMessages];
|
|
1542
|
+
this.pendingMessages = [];
|
|
1543
|
+
|
|
1544
|
+
for (const message of messages) {
|
|
1545
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
1546
|
+
console.log(` â Sending queued ${message.type} for ${message.taskId || message.agentId}`);
|
|
1547
|
+
this.ws.send(JSON.stringify(message));
|
|
1548
|
+
} else {
|
|
1549
|
+
// Still not connected, re-queue
|
|
1550
|
+
this.pendingMessages.push(message);
|
|
1551
|
+
break;
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
// Record that an agent produced output (reset stuck detection)
|
|
1557
|
+
recordAgentActivity(agentId) {
|
|
1558
|
+
this.lastAgentActivity.set(agentId, Date.now());
|
|
1559
|
+
this.pingsSinceActivity.set(agentId, 0);
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
// Collect detailed diagnostics for debug agent
|
|
1563
|
+
collectDiagnostics(agentId, taskId, error, reason) {
|
|
1564
|
+
const taskInfo = this.runningTasks.get(taskId);
|
|
1565
|
+
const lastActivity = this.lastAgentActivity.get(agentId);
|
|
1566
|
+
const pings = this.pingsSinceActivity.get(agentId) || 0;
|
|
1567
|
+
const processingTime = this.processingStartTime.get(agentId);
|
|
1568
|
+
|
|
1569
|
+
return {
|
|
1570
|
+
timestamp: new Date().toISOString(),
|
|
1571
|
+
reason, // 'error', 'stuck', 'timeout'
|
|
1572
|
+
agentId,
|
|
1573
|
+
taskId,
|
|
1574
|
+
error: error ? {
|
|
1575
|
+
message: error.message,
|
|
1576
|
+
stack: error.stack,
|
|
1577
|
+
name: error.name
|
|
1578
|
+
} : null,
|
|
1579
|
+
activity: {
|
|
1580
|
+
lastActivityTime: lastActivity,
|
|
1581
|
+
timeSinceActivity: lastActivity ? Date.now() - lastActivity : null,
|
|
1582
|
+
pingsSinceActivity: pings,
|
|
1583
|
+
processingStartTime: processingTime,
|
|
1584
|
+
processingDuration: processingTime ? Date.now() - processingTime : null
|
|
1585
|
+
},
|
|
1586
|
+
task: taskInfo ? {
|
|
1587
|
+
agentId: taskInfo.agentId,
|
|
1588
|
+
roomId: taskInfo.roomId,
|
|
1589
|
+
cancelled: taskInfo.cancelled
|
|
1590
|
+
} : null,
|
|
1591
|
+
process: {
|
|
1592
|
+
hasProcess: this.cli.activeAgents ? this.cli.activeAgents.has(agentId) : false,
|
|
1593
|
+
pid: this.cli.activeAgents ? this.cli.activeAgents.get(agentId)?.proc?.pid : null
|
|
1594
|
+
}
|
|
1595
|
+
};
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
// Send debug report to server for investigation by debug agent
|
|
1599
|
+
sendDebugReport(diagnostics, userMessage) {
|
|
1600
|
+
this.send({
|
|
1601
|
+
type: 'debug_report',
|
|
1602
|
+
diagnostics,
|
|
1603
|
+
userMessage,
|
|
1604
|
+
timestamp: new Date().toISOString()
|
|
1605
|
+
});
|
|
1606
|
+
}
|
|
1607
|
+
|
|
1608
|
+
// Check for stuck agents on each ping
|
|
1609
|
+
checkForStuckAgents() {
|
|
1610
|
+
for (const [agentId, isProcessing] of this.agentProcessing.entries()) {
|
|
1611
|
+
if (isProcessing) {
|
|
1612
|
+
// First, check if the process is still alive - if so, it's probably just thinking
|
|
1613
|
+
const agentInfo = this.cli.activeAgents?.get(agentId);
|
|
1614
|
+
const pid = agentInfo?.proc?.pid;
|
|
1615
|
+
if (pid) {
|
|
1616
|
+
try {
|
|
1617
|
+
// process.kill(pid, 0) checks if process exists without killing it
|
|
1618
|
+
process.kill(pid, 0);
|
|
1619
|
+
// Process is alive - record activity to prevent false stuck detection
|
|
1620
|
+
// This handles cases where the CLI is blocking on API calls with no stdout
|
|
1621
|
+
this.recordAgentActivity(agentId);
|
|
1622
|
+
} catch (e) {
|
|
1623
|
+
// Process is dead - let stuck detection proceed
|
|
1624
|
+
console.log(`â ī¸ Agent ${agentId} process (PID ${pid}) appears dead`);
|
|
1625
|
+
}
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
// Increment ping counter for this agent
|
|
1629
|
+
const pings = (this.pingsSinceActivity.get(agentId) || 0) + 1;
|
|
1630
|
+
this.pingsSinceActivity.set(agentId, pings);
|
|
1631
|
+
|
|
1632
|
+
// Check if there's an active task for this agent
|
|
1633
|
+
let hasActiveTask = false;
|
|
1634
|
+
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1635
|
+
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1636
|
+
hasActiveTask = true;
|
|
1637
|
+
break;
|
|
1638
|
+
}
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
// Use very long threshold if task is active (10 pings = 300s / 5 min)
|
|
1642
|
+
// OpenClaw embedded agents can spend 2-3+ minutes on complex reasoning
|
|
1643
|
+
// Only mark as stuck if truly unresponsive (no output for 5+ minutes)
|
|
1644
|
+
const threshold = hasActiveTask ? 10 : this.STUCK_PING_THRESHOLD;
|
|
1645
|
+
|
|
1646
|
+
// Log warning when agent is quiet but not yet stuck (helps with debugging)
|
|
1647
|
+
if (pings >= this.STUCK_PING_THRESHOLD && pings < threshold) {
|
|
1648
|
+
console.log(`â ī¸ Agent ${agentId} quiet for ${pings} pings (${Math.round((Date.now() - this.lastAgentActivity.get(agentId)) / 1000)}s), but task is active - waiting...`);
|
|
1649
|
+
}
|
|
1650
|
+
|
|
1651
|
+
if (pings >= threshold) {
|
|
1652
|
+
const lastActivity = this.lastAgentActivity.get(agentId);
|
|
1653
|
+
const elapsed = lastActivity ? Math.round((Date.now() - lastActivity) / 1000) : '?';
|
|
1654
|
+
const reason = hasActiveTask ? 'no output for 300s+ AND process dead' : 'no active task';
|
|
1655
|
+
console.log(`đ¨ STUCK DETECTED: Agent ${agentId} has had ${pings} pings with no activity (${reason}, last activity: ${elapsed}s ago)`);
|
|
1656
|
+
console.log(`đ¨ Force resetting agent ${agentId} to accept new tasks`);
|
|
1657
|
+
|
|
1658
|
+
// Find the task for diagnostics
|
|
1659
|
+
let stuckTaskId = null;
|
|
1660
|
+
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1661
|
+
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1662
|
+
stuckTaskId = taskId;
|
|
1663
|
+
break;
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// Collect diagnostics before cleanup
|
|
1668
|
+
const diagnostics = this.collectDiagnostics(
|
|
1669
|
+
agentId,
|
|
1670
|
+
stuckTaskId,
|
|
1671
|
+
new Error(`Agent unresponsive for ${elapsed}s`),
|
|
1672
|
+
'stuck'
|
|
1673
|
+
);
|
|
1674
|
+
|
|
1675
|
+
// Force kill the process â try both runners
|
|
1676
|
+
this.cli.cancelAgent(agentId);
|
|
1677
|
+
this.hampagent?.cancelAgent(agentId);
|
|
1678
|
+
|
|
1679
|
+
// Clear all state for this agent
|
|
1680
|
+
this.agentProcessing.set(agentId, false);
|
|
1681
|
+
this.processingStartTime.delete(agentId);
|
|
1682
|
+
this.pingsSinceActivity.set(agentId, 0);
|
|
1683
|
+
|
|
1684
|
+
// Find and cancel any running task for this agent
|
|
1685
|
+
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1686
|
+
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1687
|
+
taskInfo.cancelled = true;
|
|
1688
|
+
this.runningTasks.delete(taskId);
|
|
1689
|
+
this.send({
|
|
1690
|
+
type: 'task_failed',
|
|
1691
|
+
taskId,
|
|
1692
|
+
agentId,
|
|
1693
|
+
error: 'Agent became unresponsive (stuck detection triggered)'
|
|
1694
|
+
});
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
// Send debug report
|
|
1699
|
+
this.sendDebugReport(diagnostics, `Agent ${agentId} became unresponsive after ${elapsed}s with no activity`);
|
|
1700
|
+
|
|
1701
|
+
// Process any queued tasks
|
|
1702
|
+
const queue = this.agentQueues.get(agentId);
|
|
1703
|
+
if (queue && queue.length > 0) {
|
|
1704
|
+
console.log(`đ¤ Processing ${queue.length} queued tasks after stuck recovery`);
|
|
1705
|
+
setImmediate(() => this.processQueue(agentId));
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
async shutdown() {
|
|
1713
|
+
console.log('đ Shutting down worker...');
|
|
1714
|
+
// Kill all active agent processes so they don't become orphans on restart
|
|
1715
|
+
if (this.cli && typeof this.cli.cancelAgent === 'function') {
|
|
1716
|
+
for (const agentId of this.agentProcessing.keys()) {
|
|
1717
|
+
if (this.agentProcessing.get(agentId)) {
|
|
1718
|
+
console.log(`đĒ Killing agent process: ${agentId}`);
|
|
1719
|
+
try { this.cli.cancelAgent(agentId); } catch (e) { /* already dead */ }
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
if (this.ws) {
|
|
1724
|
+
this.ws.close();
|
|
1725
|
+
}
|
|
1726
|
+
process.exit(0);
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
// Find the AgentForge git repo root, regardless of whether worker is globally installed or run from source
|
|
1730
|
+
_findRepoRoot() {
|
|
1731
|
+
const home = homedir();
|
|
1732
|
+
const candidates = [
|
|
1733
|
+
path.resolve(__dirname, '../../..'), // running from source
|
|
1734
|
+
path.join(home, 'Desktop', 'Projects', 'AgentForge.ai'),
|
|
1735
|
+
path.join(home, 'Desktop', 'projects', 'AgentForge.ai'),
|
|
1736
|
+
path.join(home, 'Projects', 'AgentForge.ai'),
|
|
1737
|
+
path.join(home, 'projects', 'AgentForge.ai'),
|
|
1738
|
+
];
|
|
1739
|
+
for (const candidate of candidates) {
|
|
1740
|
+
if (existsSync(path.join(candidate, '.git'))) return candidate;
|
|
1741
|
+
}
|
|
1742
|
+
return null;
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
// Auto-update: git pull the repo, reinstall the global package, then exit so process manager restarts
|
|
1746
|
+
async _updateAndRestart() {
|
|
1747
|
+
const { execSync } = await import('child_process');
|
|
1748
|
+
const repoRoot = this._findRepoRoot();
|
|
1749
|
+
if (repoRoot) {
|
|
1750
|
+
try {
|
|
1751
|
+
console.log(`đĻ git pull in ${repoRoot}...`);
|
|
1752
|
+
const out = execSync('git pull', { cwd: repoRoot, encoding: 'utf-8', timeout: 30000 });
|
|
1753
|
+
console.log(out.trim() || '(already up to date)');
|
|
1754
|
+
// Reinstall global package so updated source is picked up on next start
|
|
1755
|
+
try {
|
|
1756
|
+
console.log('đĻ Reinstalling agentforge package...');
|
|
1757
|
+
execSync('npm install -g ./packages/worker', { cwd: repoRoot, encoding: 'utf-8', timeout: 60000, stdio: 'pipe' });
|
|
1758
|
+
console.log('â
Package reinstalled');
|
|
1759
|
+
} catch (e) {
|
|
1760
|
+
console.warn('â ī¸ npm install failed (will restart anyway):', e.message);
|
|
1761
|
+
}
|
|
1762
|
+
} catch (e) {
|
|
1763
|
+
console.warn('â ī¸ git pull failed (will restart anyway):', e.message);
|
|
1764
|
+
}
|
|
1765
|
+
} else {
|
|
1766
|
+
console.log('âšī¸ No git repo found â restarting with current code');
|
|
1767
|
+
}
|
|
1768
|
+
console.log('đ Auto-update complete â exiting so you can restart: agentforge start');
|
|
1769
|
+
process.exit(0);
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
// Periodically check for updates and restart if new commits are available
|
|
1773
|
+
_startAutoUpdateCheck(intervalMs = 10 * 60 * 1000) {
|
|
1774
|
+
setInterval(async () => {
|
|
1775
|
+
const { execSync } = await import('child_process');
|
|
1776
|
+
const repoRoot = this._findRepoRoot();
|
|
1777
|
+
if (!repoRoot) return;
|
|
1778
|
+
try {
|
|
1779
|
+
execSync('git fetch --quiet', { cwd: repoRoot, timeout: 15000 });
|
|
1780
|
+
const status = execSync('git rev-list HEAD..origin/master --count', { cwd: repoRoot, encoding: 'utf-8', timeout: 5000 }).trim();
|
|
1781
|
+
if (parseInt(status) > 0) {
|
|
1782
|
+
console.log(`đ ${status} new commit(s) on origin/master â auto-updating (worker will restart)...`);
|
|
1783
|
+
if (this.ws?.readyState === 1) this.send({ type: 'worker_restarting' });
|
|
1784
|
+
setTimeout(() => this._updateAndRestart(), 300);
|
|
1785
|
+
}
|
|
1786
|
+
} catch (e) {
|
|
1787
|
+
// Non-fatal â network may be down, skip this check
|
|
1788
|
+
}
|
|
1789
|
+
}, intervalMs);
|
|
1790
|
+
}
|
|
1791
|
+
}
|