@hamp10/agentforge 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agentforge.js +390 -44
- package/package.json +1 -1
- package/src/OpenClawCLI.js +204 -46
- package/src/resolveOpenclaw.js +105 -0
- package/src/selfUpdate.js +66 -0
- package/src/supervisor.js +128 -0
- package/src/worker.js +265 -227
- package/templates/agent/AGENTFORGE.md +148 -56
- package/templates/agent/AGENTS.md +0 -212
- package/templates/agent/SOUL.md +0 -36
- package/templates/agent/TOOLS.md +0 -40
package/src/worker.js
CHANGED
|
@@ -4,9 +4,10 @@ import WebSocket from 'ws';
|
|
|
4
4
|
import { OpenClawCLI } from './OpenClawCLI.js';
|
|
5
5
|
import { HampAgentCLI } from './HampAgentCLI.js';
|
|
6
6
|
import { OllamaAgent } from './OllamaAgent.js';
|
|
7
|
+
import { resolveOpenclawModule } from './resolveOpenclaw.js';
|
|
7
8
|
import EventEmitter from 'events';
|
|
8
9
|
import path from 'path';
|
|
9
|
-
import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync, copyFileSync, statSync, unlinkSync } from 'fs';
|
|
10
|
+
import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync, copyFileSync, statSync, unlinkSync, openSync } from 'fs';
|
|
10
11
|
import { fileURLToPath } from 'url';
|
|
11
12
|
import { homedir, hostname } from 'os';
|
|
12
13
|
import { spawn } from 'child_process';
|
|
@@ -71,9 +72,17 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
71
72
|
this.agentProcessing = new Map(); // agentId -> boolean (is currently processing)
|
|
72
73
|
this.processingStartTime = new Map(); // agentId -> timestamp when processing started
|
|
73
74
|
this.PROCESSING_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes max for stale processing state (large projects with Opus can be slow)
|
|
75
|
+
|
|
76
|
+
// Browser mutex — only one agent can use the browser (CDP) at a time.
|
|
77
|
+
// Others wait in a queue. Prevents parallel agents from conflicting on port 9223.
|
|
78
|
+
this._browserQueue = [];
|
|
79
|
+
this._browserBusy = false;
|
|
74
80
|
|
|
75
81
|
// Track running tasks for cancellation
|
|
76
82
|
this.runningTasks = new Map(); // taskId -> { agentId, cancelled }
|
|
83
|
+
|
|
84
|
+
// Track last output time per agent — used to detect broken gateway streams after reconnect
|
|
85
|
+
this.lastOutputTime = new Map(); // agentId -> timestamp
|
|
77
86
|
|
|
78
87
|
// Queue for messages that couldn't be sent while disconnected
|
|
79
88
|
this.pendingMessages = [];
|
|
@@ -83,10 +92,6 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
83
92
|
this.recentCompletions = new Set();
|
|
84
93
|
this.completionTTL = 30000; // 30 seconds
|
|
85
94
|
|
|
86
|
-
// Track agent activity for stuck detection
|
|
87
|
-
this.lastAgentActivity = new Map(); // agentId -> timestamp
|
|
88
|
-
this.pingsSinceActivity = new Map(); // agentId -> count
|
|
89
|
-
this.STUCK_PING_THRESHOLD = 2; // 2 pings with no activity = stuck (~60s since server pings every 30s)
|
|
90
95
|
}
|
|
91
96
|
|
|
92
97
|
speakTextOutLoud(utterance) {
|
|
@@ -170,17 +175,45 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
170
175
|
|
|
171
176
|
async initialize() {
|
|
172
177
|
this._killOrphanedAgents();
|
|
178
|
+
await this._startGateway();
|
|
173
179
|
this.installPreviewServer();
|
|
174
180
|
this._startAutoUpdateCheck();
|
|
175
181
|
console.log('✅ Worker initialized');
|
|
176
182
|
}
|
|
177
183
|
|
|
184
|
+
async _startGateway() {
|
|
185
|
+
// Spawn openclaw-gateway as a child of this worker process.
|
|
186
|
+
// When the worker exits (Ctrl+C in terminal), the gateway dies with it.
|
|
187
|
+
// No LaunchAgent needed — the terminal session owns everything.
|
|
188
|
+
const openclaw = resolveOpenclawModule();
|
|
189
|
+
if (!openclaw) {
|
|
190
|
+
console.warn('⚠️ openclaw not found — browser tools unavailable. Install with: npm install -g openclaw');
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
const port = 18789;
|
|
194
|
+
const logDir = path.join(homedir(), '.openclaw', 'logs');
|
|
195
|
+
mkdirSync(logDir, { recursive: true });
|
|
196
|
+
const logOut = openSync(path.join(logDir, 'gateway.log'), 'a');
|
|
197
|
+
const logErr = openSync(path.join(logDir, 'gateway.err.log'), 'a');
|
|
198
|
+
const gw = spawn(process.execPath, [openclaw, 'gateway', '--port', String(port)], {
|
|
199
|
+
stdio: ['ignore', logOut, logErr],
|
|
200
|
+
env: { ...process.env, NODE_EXTRA_CA_CERTS: '/etc/ssl/cert.pem' },
|
|
201
|
+
detached: false
|
|
202
|
+
});
|
|
203
|
+
gw.on('exit', (code) => {
|
|
204
|
+
if (code !== null) console.warn(`⚠️ openclaw-gateway exited (code ${code}) — browser tools will fail until worker restarts`);
|
|
205
|
+
});
|
|
206
|
+
console.log(`🌐 OpenClaw Gateway started (PID: ${gw.pid}, port: ${port})`);
|
|
207
|
+
// Brief pause so gateway is listening before first agent task
|
|
208
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
209
|
+
}
|
|
210
|
+
|
|
178
211
|
_killOrphanedAgents() {
|
|
179
212
|
// Kill any openclaw agent processes left over from a previous worker session.
|
|
180
213
|
// Without this, orphaned processes reconnect to the gateway and block the task queue.
|
|
181
214
|
for (const name of ['openclaw-agent', 'openclaw-gateway']) {
|
|
182
215
|
try {
|
|
183
|
-
const p = spawn('pkill', ['-f', name], { stdio: 'ignore' });
|
|
216
|
+
const p = spawn('pkill', ['-9', '-f', name], { stdio: 'ignore' });
|
|
184
217
|
p.on('close', (code) => {
|
|
185
218
|
if (code === 0) console.log(`🧹 Killed orphaned ${name} processes`);
|
|
186
219
|
});
|
|
@@ -323,6 +356,37 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
323
356
|
this.flushPendingMessages();
|
|
324
357
|
this.processAllQueues(); // Kick-start any stalled queues
|
|
325
358
|
}, 500);
|
|
359
|
+
|
|
360
|
+
// After 90s, cancel any running tasks that produced zero output since reconnect.
|
|
361
|
+
// This catches broken gateway streams that went dark during the disconnect.
|
|
362
|
+
// A healthy agent mid-build produces output continuously (tool calls, writes, etc.)
|
|
363
|
+
// and never goes 90s silent. Only broken/frozen streams stay silent this long.
|
|
364
|
+
const tasksAtReconnect = new Map(this.runningTasks);
|
|
365
|
+
const reconnectTime = Date.now();
|
|
366
|
+
setTimeout(() => {
|
|
367
|
+
for (const [tid, taskInfo] of tasksAtReconnect.entries()) {
|
|
368
|
+
if (taskInfo.cancelled) continue;
|
|
369
|
+
const current = this.runningTasks.get(tid);
|
|
370
|
+
if (!current || current.cancelled) continue; // task finished normally
|
|
371
|
+
const lastOut = this.lastOutputTime.get(taskInfo.agentId) || 0;
|
|
372
|
+
if (lastOut < reconnectTime) {
|
|
373
|
+
// No output since the reconnect — stream is dead
|
|
374
|
+
console.log(`⚠️ Agent ${taskInfo.agentId} produced no output in 45s after reconnect — cancelling (broken stream)`);
|
|
375
|
+
this.cli.cancelAgent(taskInfo.agentId);
|
|
376
|
+
this.runningTasks.delete(tid);
|
|
377
|
+
this.agentProcessing.set(taskInfo.agentId, false);
|
|
378
|
+
this.processingStartTime.delete(taskInfo.agentId);
|
|
379
|
+
this.send({
|
|
380
|
+
type: 'task_progress',
|
|
381
|
+
taskId: tid,
|
|
382
|
+
agentId: taskInfo.agentId,
|
|
383
|
+
output: '⚠️ Task was interrupted by a connection issue. Please resend your message to continue.',
|
|
384
|
+
isChunk: true
|
|
385
|
+
});
|
|
386
|
+
this.send({ type: 'task_cancelled', taskId: tid, agentId: taskInfo.agentId });
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}, 90000);
|
|
326
390
|
}
|
|
327
391
|
|
|
328
392
|
resolve();
|
|
@@ -395,22 +459,34 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
395
459
|
await this.executeTask(message);
|
|
396
460
|
break;
|
|
397
461
|
|
|
398
|
-
case 'task_cancel':
|
|
399
|
-
|
|
400
|
-
console.log(`📨 CANCEL REQUEST: taskId=${
|
|
401
|
-
if (
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
462
|
+
case 'task_cancel': {
|
|
463
|
+
const { agentId: cancelAgentId, taskId: cancelTaskId } = message;
|
|
464
|
+
console.log(`📨 CANCEL REQUEST: taskId=${cancelTaskId} agentId=${cancelAgentId}`);
|
|
465
|
+
if (!cancelAgentId) {
|
|
466
|
+
console.log(`⚠️ task_cancel received without agentId!`);
|
|
467
|
+
break;
|
|
468
|
+
}
|
|
469
|
+
// Always kill the process immediately by agentId — don't wait for task lookup
|
|
470
|
+
// Task lookup can fail if taskId is stale (agent moved to a queued message)
|
|
471
|
+
const killed = this.cli.cancelAgent(cancelAgentId) || this.hampagent?.cancelAgent(cancelAgentId) || false;
|
|
472
|
+
console.log(`🛑 Direct kill for agent ${cancelAgentId}: ${killed}`);
|
|
473
|
+
// Clean up all state for this agent
|
|
474
|
+
this.agentQueues.set(cancelAgentId, []);
|
|
475
|
+
this.agentProcessing.set(cancelAgentId, false);
|
|
476
|
+
this.processingStartTime.delete(cancelAgentId);
|
|
477
|
+
// Mark any tracked tasks for this agent as cancelled
|
|
478
|
+
for (const [tid, info] of this.runningTasks.entries()) {
|
|
479
|
+
if (info.agentId === cancelAgentId) {
|
|
480
|
+
info.cancelled = true;
|
|
481
|
+
this.runningTasks.delete(tid);
|
|
482
|
+
}
|
|
407
483
|
}
|
|
484
|
+
this.send({ type: 'task_cancelled', taskId: cancelTaskId, agentId: cancelAgentId });
|
|
408
485
|
break;
|
|
486
|
+
}
|
|
409
487
|
|
|
410
488
|
case 'ping':
|
|
411
489
|
this.send({ type: 'pong' });
|
|
412
|
-
// Check for stuck agents - if processing but no activity for 2+ pings
|
|
413
|
-
this.checkForStuckAgents();
|
|
414
490
|
break;
|
|
415
491
|
|
|
416
492
|
case 'worker_restart':
|
|
@@ -440,6 +516,47 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
440
516
|
break;
|
|
441
517
|
}
|
|
442
518
|
|
|
519
|
+
case 'get_models': {
|
|
520
|
+
// Return locally available Ollama models + openclaw.json catalog
|
|
521
|
+
let ollamaModels = [];
|
|
522
|
+
let catalogModels = {};
|
|
523
|
+
try {
|
|
524
|
+
const resp = await fetch('http://localhost:11434/api/tags', {
|
|
525
|
+
signal: AbortSignal.timeout(3000),
|
|
526
|
+
});
|
|
527
|
+
if (resp.ok) {
|
|
528
|
+
const data = await resp.json();
|
|
529
|
+
ollamaModels = (data.models || []).map(m => ({
|
|
530
|
+
id: `ollama/${m.name}`,
|
|
531
|
+
name: m.name,
|
|
532
|
+
size: m.size,
|
|
533
|
+
tier: 'local',
|
|
534
|
+
}));
|
|
535
|
+
}
|
|
536
|
+
} catch {
|
|
537
|
+
// Ollama not running — that's fine
|
|
538
|
+
}
|
|
539
|
+
let primaryModel = null;
|
|
540
|
+
try {
|
|
541
|
+
const cfgPath = path.join(homedir(), '.openclaw', 'openclaw.json');
|
|
542
|
+
if (existsSync(cfgPath)) {
|
|
543
|
+
const cfg = JSON.parse(readFileSync(cfgPath, 'utf-8'));
|
|
544
|
+
catalogModels = cfg?.agents?.defaults?.models || {};
|
|
545
|
+
primaryModel = cfg?.agents?.defaults?.model?.primary || null;
|
|
546
|
+
}
|
|
547
|
+
} catch {
|
|
548
|
+
// ignore
|
|
549
|
+
}
|
|
550
|
+
this.send({
|
|
551
|
+
type: 'get_models_result',
|
|
552
|
+
requestId: message.requestId,
|
|
553
|
+
ollamaModels,
|
|
554
|
+
catalogModels,
|
|
555
|
+
primaryModel,
|
|
556
|
+
});
|
|
557
|
+
break;
|
|
558
|
+
}
|
|
559
|
+
|
|
443
560
|
default:
|
|
444
561
|
console.log(`⚠️ Unknown message type: ${message.type}`);
|
|
445
562
|
}
|
|
@@ -470,6 +587,31 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
470
587
|
this.processQueue(agentId);
|
|
471
588
|
}
|
|
472
589
|
|
|
590
|
+
// Browser mutex — serialises all browser tool access across parallel agents.
|
|
591
|
+
// Returns a release function; call it when the agent is done with the browser.
|
|
592
|
+
acquireBrowser(agentId) {
|
|
593
|
+
return new Promise((resolve) => {
|
|
594
|
+
const attempt = () => {
|
|
595
|
+
if (!this._browserBusy) {
|
|
596
|
+
this._browserBusy = true;
|
|
597
|
+
console.log(`🌐 [${agentId}] Acquired browser`);
|
|
598
|
+
resolve(() => {
|
|
599
|
+
console.log(`🌐 [${agentId}] Released browser`);
|
|
600
|
+
this._browserBusy = false;
|
|
601
|
+
if (this._browserQueue.length > 0) {
|
|
602
|
+
const next = this._browserQueue.shift();
|
|
603
|
+
next();
|
|
604
|
+
}
|
|
605
|
+
});
|
|
606
|
+
} else {
|
|
607
|
+
console.log(`🌐 [${agentId}] Waiting for browser (${this._browserQueue.length + 1} in queue)`);
|
|
608
|
+
this._browserQueue.push(attempt);
|
|
609
|
+
}
|
|
610
|
+
};
|
|
611
|
+
attempt();
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
|
|
473
615
|
async processQueue(agentId) {
|
|
474
616
|
// If already processing, check if it's stale
|
|
475
617
|
if (this.agentProcessing.get(agentId)) {
|
|
@@ -498,8 +640,9 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
498
640
|
}
|
|
499
641
|
|
|
500
642
|
// Mark as processing with timestamp
|
|
643
|
+
const myStartTime = Date.now();
|
|
501
644
|
this.agentProcessing.set(agentId, true);
|
|
502
|
-
this.processingStartTime.set(agentId,
|
|
645
|
+
this.processingStartTime.set(agentId, myStartTime);
|
|
503
646
|
console.log(`🚀 Starting task for ${agentId} (${queue.length} in queue)`);
|
|
504
647
|
|
|
505
648
|
// Get next task from queue
|
|
@@ -584,10 +727,14 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
584
727
|
if (queueTimeoutFired && !executeTaskCompleted) {
|
|
585
728
|
console.log(`[${agentId}] ⚠️ Queue timeout won the race - executeTaskNow never completed`);
|
|
586
729
|
}
|
|
587
|
-
//
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
730
|
+
// Only clear processing state if we still own it (a newer task may have taken over)
|
|
731
|
+
if (this.processingStartTime.get(agentId) === myStartTime) {
|
|
732
|
+
console.log(`🧹 Clearing processing state for ${agentId}`);
|
|
733
|
+
this.agentProcessing.set(agentId, false);
|
|
734
|
+
this.processingStartTime.delete(agentId);
|
|
735
|
+
} else {
|
|
736
|
+
console.log(`🧹 Skipping processing state clear for ${agentId} — newer task owns it`);
|
|
737
|
+
}
|
|
591
738
|
|
|
592
739
|
// Process next task if queue is not empty
|
|
593
740
|
if (queue.length > 0) {
|
|
@@ -604,7 +751,7 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
604
751
|
}
|
|
605
752
|
}
|
|
606
753
|
|
|
607
|
-
async executeTaskNow({ taskId, agentId, sessionId, message: userMessage, workDir, defaultProjectsPath, image, roomId, roomContext, isMaestro, conversationHistory, browserProfile, agentName, agentEmoji, runnerType }) {
|
|
754
|
+
async executeTaskNow({ taskId, agentId, sessionId, message: userMessage, workDir, defaultProjectsPath, image, roomId, roomContext, isMaestro, conversationHistory, browserProfile, agentName, agentEmoji, runnerType, agentModel }) {
|
|
608
755
|
const isMaestroTask = isMaestro || agentId === 'maestro';
|
|
609
756
|
console.log(`🤖 Executing task ${taskId} for agent ${agentId}${isMaestroTask ? ' (MAESTRO)' : ''}${browserProfile ? ` [browser: ${browserProfile}]` : ''}`);
|
|
610
757
|
if (sessionId) {
|
|
@@ -713,9 +860,8 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
713
860
|
if (!taskInfo || taskInfo.cancelled) {
|
|
714
861
|
return; // Task cancelled, drop all output
|
|
715
862
|
}
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
this.recordAgentActivity(agentId);
|
|
863
|
+
// Track last output time for broken-stream detection after reconnect
|
|
864
|
+
this.lastOutputTime.set(agentId, Date.now());
|
|
719
865
|
|
|
720
866
|
// Filter out tool error stack traces from room chat
|
|
721
867
|
const text = data.output?.trim();
|
|
@@ -827,11 +973,8 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
827
973
|
// Set up tool activity streaming (shows what tool agent is using)
|
|
828
974
|
const toolActivityHandler = (data) => {
|
|
829
975
|
if (data.agentId === agentId) {
|
|
830
|
-
// Record activity to prevent stuck detection from firing
|
|
831
|
-
this.recordAgentActivity(agentId);
|
|
832
|
-
|
|
833
976
|
// tts tool is handled natively by openclaw — do not intercept or emit anything
|
|
834
|
-
|
|
977
|
+
|
|
835
978
|
let toolInputPreview;
|
|
836
979
|
if (data.toolInput) {
|
|
837
980
|
try {
|
|
@@ -841,6 +984,11 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
841
984
|
}
|
|
842
985
|
}
|
|
843
986
|
|
|
987
|
+
// Log tool calls to worker.log for always-on visibility
|
|
988
|
+
if ((data.event === 'tool_start' || data.event === 'start') && data.description) {
|
|
989
|
+
console.log(`[${agentId}] 🔧 ${data.description}${toolInputPreview ? ` — ${toolInputPreview.slice(0, 120)}` : ''}`);
|
|
990
|
+
}
|
|
991
|
+
|
|
844
992
|
this.send({
|
|
845
993
|
type: 'tool_activity',
|
|
846
994
|
taskId,
|
|
@@ -858,69 +1006,10 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
858
1006
|
activeRunner.on('agent_error', errorHandler);
|
|
859
1007
|
activeRunner.on('tool_activity', toolActivityHandler);
|
|
860
1008
|
|
|
861
|
-
// Listen for raw alive signals (any stdout, even filtered) to prevent false stuck detection
|
|
862
|
-
const aliveHandler = (data) => {
|
|
863
|
-
if (data.agentId === agentId) {
|
|
864
|
-
this.recordAgentActivity(agentId);
|
|
865
|
-
}
|
|
866
|
-
};
|
|
867
|
-
activeRunner.on('agent_alive', aliveHandler);
|
|
868
|
-
|
|
869
|
-
// Inactivity: warn at 60s, KILL at 10 minutes of silence (no stdout at all)
|
|
870
|
-
const INACTIVITY_WARN_MS = 60000;
|
|
871
|
-
const INACTIVITY_KILL_MS = 2 * 60 * 1000; // 2 minutes — kill truly hung openclaw
|
|
872
|
-
let lastActivityTime = Date.now();
|
|
873
|
-
let inactivityTimer = null;
|
|
874
|
-
let inactivityKillTimer = null;
|
|
875
1009
|
let currentTool = null; // Track which tool is currently running
|
|
876
|
-
let promiseSettled = false;
|
|
877
|
-
|
|
878
|
-
// Tools that are expected to take a while - don't warn about these
|
|
879
|
-
const QUIET_TOOLS = ['Editing file', 'Writing file', 'Reading file', 'edit', 'write', 'read'];
|
|
880
|
-
|
|
881
|
-
const clearInactivityTimers = () => {
|
|
882
|
-
if (inactivityTimer) { clearTimeout(inactivityTimer); inactivityTimer = null; }
|
|
883
|
-
if (inactivityKillTimer) { clearTimeout(inactivityKillTimer); inactivityKillTimer = null; }
|
|
884
|
-
};
|
|
1010
|
+
let promiseSettled = false;
|
|
885
1011
|
|
|
886
1012
|
const resetInactivityTimer = () => {
|
|
887
|
-
lastActivityTime = Date.now();
|
|
888
|
-
// Also reset the ping-based stuck detector so 300s kill doesn't fire during active work
|
|
889
|
-
this.lastAgentActivity.set(agentId, Date.now());
|
|
890
|
-
this.pingsSinceActivity.set(agentId, 0);
|
|
891
|
-
clearInactivityTimers();
|
|
892
|
-
|
|
893
|
-
// Warn at 30s
|
|
894
|
-
inactivityTimer = setTimeout(() => {
|
|
895
|
-
const taskInfo = this.runningTasks.get(taskId);
|
|
896
|
-
if (taskInfo && !taskInfo.cancelled) {
|
|
897
|
-
if (currentTool && QUIET_TOOLS.some(t => currentTool.toLowerCase().includes(t.toLowerCase()))) {
|
|
898
|
-
resetInactivityTimer(); // quiet tool — just reset and check again later
|
|
899
|
-
return;
|
|
900
|
-
}
|
|
901
|
-
const stuckTool = currentTool ? ` while running "${currentTool}"` : '';
|
|
902
|
-
this.send({
|
|
903
|
-
type: 'task_warning',
|
|
904
|
-
taskId,
|
|
905
|
-
agentId,
|
|
906
|
-
roomId,
|
|
907
|
-
warning: `No activity for ${INACTIVITY_WARN_MS/1000} seconds${stuckTool} - agent may be stuck`,
|
|
908
|
-
lastTool: currentTool
|
|
909
|
-
});
|
|
910
|
-
}
|
|
911
|
-
}, INACTIVITY_WARN_MS);
|
|
912
|
-
|
|
913
|
-
// Kill at 10 minutes — openclaw is definitely hung
|
|
914
|
-
inactivityKillTimer = setTimeout(() => {
|
|
915
|
-
if (promiseSettled) return;
|
|
916
|
-
const taskInfo = this.runningTasks.get(taskId);
|
|
917
|
-
if (taskInfo && !taskInfo.cancelled) {
|
|
918
|
-
console.warn(`[${agentId}] ⚠️ No output for ${INACTIVITY_KILL_MS/1000}s — openclaw hung mid-task, killing`);
|
|
919
|
-
promiseSettled = true;
|
|
920
|
-
// cancelAgent kills the process tree; OpenClawCLI's close handler will reject runAgentTask
|
|
921
|
-
activeRunner.cancelAgent(agentId);
|
|
922
|
-
}
|
|
923
|
-
}, INACTIVITY_KILL_MS);
|
|
924
1013
|
};
|
|
925
1014
|
|
|
926
1015
|
// Track tool lifecycle
|
|
@@ -944,24 +1033,19 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
944
1033
|
// Wrap handlers to track activity
|
|
945
1034
|
const wrappedOutputHandler = activityWrapper(outputHandler);
|
|
946
1035
|
const wrappedToolHandler = activityWrapper(toolActivityHandler);
|
|
947
|
-
|
|
948
|
-
|
|
1036
|
+
|
|
949
1037
|
activeRunner.off('agent_output', outputHandler);
|
|
950
1038
|
activeRunner.off('tool_activity', toolActivityHandler);
|
|
951
|
-
activeRunner.off('agent_alive', aliveHandler);
|
|
952
1039
|
activeRunner.on('agent_output', wrappedOutputHandler);
|
|
953
1040
|
activeRunner.on('tool_activity', wrappedToolHandler);
|
|
954
|
-
activeRunner.on('agent_alive', wrappedAliveHandler);
|
|
955
1041
|
|
|
956
1042
|
// Capture cleanup as a callable closure so both try and catch paths can use it
|
|
957
1043
|
_cleanup = () => {
|
|
958
|
-
promiseSettled = true;
|
|
959
|
-
clearInactivityTimers();
|
|
1044
|
+
promiseSettled = true;
|
|
960
1045
|
activeRunner.off('agent_output', wrappedOutputHandler);
|
|
961
1046
|
activeRunner.off('agent_error', errorHandler);
|
|
962
1047
|
activeRunner.off('tool_activity', wrappedToolHandler);
|
|
963
1048
|
activeRunner.off('tool_activity', toolLifecycleHandler);
|
|
964
|
-
activeRunner.off('agent_alive', wrappedAliveHandler);
|
|
965
1049
|
activeRunner.off('agent_image', imageHandler);
|
|
966
1050
|
};
|
|
967
1051
|
|
|
@@ -981,14 +1065,74 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
981
1065
|
finalMessage = contextInfo;
|
|
982
1066
|
}
|
|
983
1067
|
|
|
1068
|
+
// If taskCwd is set, scan projects folder and auto-inject context for any project
|
|
1069
|
+
// mentioned by name in the user message — so the agent never needs to ask "where is it?"
|
|
1070
|
+
let projectContext = null;
|
|
1071
|
+
if (taskCwd && taskCwd !== agentWorkspaceDir) {
|
|
1072
|
+
try {
|
|
1073
|
+
const projects = readdirSync(taskCwd).filter(e => !e.startsWith('.'));
|
|
1074
|
+
// Find project dirs whose name appears in the user message (case-insensitive)
|
|
1075
|
+
const msg = userMessage.toLowerCase();
|
|
1076
|
+
const matched = projects.filter(p => msg.includes(p.toLowerCase()));
|
|
1077
|
+
if (matched.length > 0) {
|
|
1078
|
+
const snippets = [];
|
|
1079
|
+
for (const proj of matched.slice(0, 2)) {
|
|
1080
|
+
const projPath = path.join(taskCwd, proj);
|
|
1081
|
+
// Read package.json or first README for stack/description
|
|
1082
|
+
for (const fname of ['package.json', 'README.md', 'readme.md']) {
|
|
1083
|
+
const fpath = path.join(projPath, fname);
|
|
1084
|
+
if (existsSync(fpath)) {
|
|
1085
|
+
try {
|
|
1086
|
+
const raw = readFileSync(fpath, 'utf-8').slice(0, 800);
|
|
1087
|
+
snippets.push(`--- ${proj}/${fname} ---\n${raw}`);
|
|
1088
|
+
break;
|
|
1089
|
+
} catch { /* skip */ }
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
// If nested (e.g. "Faith Guide/Faith Guide App"), go one level deeper
|
|
1093
|
+
if (snippets.length === 0) {
|
|
1094
|
+
try {
|
|
1095
|
+
const sub = readdirSync(projPath).filter(e => !e.startsWith('.'));
|
|
1096
|
+
for (const s of sub.slice(0, 4)) {
|
|
1097
|
+
const subPath = path.join(projPath, s);
|
|
1098
|
+
// Try package.json first
|
|
1099
|
+
const pkgPath = path.join(subPath, 'package.json');
|
|
1100
|
+
if (existsSync(pkgPath)) {
|
|
1101
|
+
try { snippets.push(`--- ${proj}/${s}/package.json ---\n${readFileSync(pkgPath, 'utf-8').slice(0, 600)}`); break; } catch { /* skip */ }
|
|
1102
|
+
}
|
|
1103
|
+
// Detect Swift/iOS (Xcode project)
|
|
1104
|
+
const xcode = readdirSync(subPath).find(f => f.endsWith('.xcodeproj'));
|
|
1105
|
+
if (xcode) {
|
|
1106
|
+
// Read any .md file in this folder for context
|
|
1107
|
+
const md = readdirSync(subPath).find(f => f.endsWith('.md'));
|
|
1108
|
+
const mdContent = md ? readFileSync(path.join(subPath, md), 'utf-8').slice(0, 600) : '';
|
|
1109
|
+
const subContents = readdirSync(subPath).filter(e => !e.startsWith('.')).join(', ');
|
|
1110
|
+
snippets.push(`--- ${proj}/${s} --- (Swift/iOS Xcode project)\nContents: ${subContents}${mdContent ? '\n\n' + md + ':\n' + mdContent : ''}`);
|
|
1111
|
+
break;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
if (snippets.length === 0) snippets.push(`--- ${proj}/ ---\nSubfolders: ${sub.join(', ')}`);
|
|
1115
|
+
} catch { /* skip */ }
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
if (snippets.length > 0) {
|
|
1119
|
+
projectContext = `[Project context pre-loaded — do NOT ask the user for this info:\n${snippets.join('\n\n')}\n]`;
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
} catch { /* non-fatal */ }
|
|
1123
|
+
}
|
|
1124
|
+
|
|
984
1125
|
// Inject platform context into EVERY message so the agent always knows:
|
|
985
1126
|
// 1. What platform it's running on and its URL
|
|
986
1127
|
// 2. Where the user's projects folder is
|
|
987
1128
|
// 3. Screenshot capabilities
|
|
988
1129
|
const platformContext = [
|
|
989
1130
|
`[System context:`,
|
|
990
|
-
`- Platform: AgentForge.ai. Dashboard: https://agentforgeai-production.up.railway.app/dashboard. CRITICAL: Always use the built-in 'browser' tool for ALL web browsing AND web searches — NEVER use the 'web_search' tool (no API keys are configured), NEVER run shell commands like 'open', 'google-chrome', 'chromium', or any OS command to launch a browser. The browser tool connects to AgentForge Browser (port 9223) automatically. To search: use browser to navigate to google.com
|
|
991
|
-
`-
|
|
1131
|
+
`- Platform: AgentForge.ai. Dashboard: https://agentforgeai-production.up.railway.app/dashboard. CRITICAL: Always use the built-in 'browser' tool for ALL web browsing AND web searches — NEVER use the 'web_search' tool (no API keys are configured), NEVER run shell commands like 'open', 'google-chrome', 'chromium', or any OS command to launch a browser. The browser tool connects to AgentForge Browser (port 9223) automatically. To search: use browser to navigate to google.com.`,
|
|
1132
|
+
`- VIEWING/TESTING A WEB APP: Always check for a deployed URL first — look in the project for railway.toml, vercel.json, netlify.toml, .env, README.md, or package.json for a live URL. Open the deployed app in the browser. Only spin up a local server if there is genuinely no deployed version. Never default to localhost when a live URL might exist.`,
|
|
1133
|
+
`- LOCAL SERVERS: If you must use localhost, try http://127.0.0.1:PORT if http://localhost:PORT fails. Do not stop and ask — just try both.`,
|
|
1134
|
+
`- CREATING AGENTS AND CHATTING WITH THEM: Open the browser to https://agentforgeai-production.up.railway.app/dashboard. Click + to create a new agent. Click into that new agent's chat panel. Type a message into the chat input and send it — IN THE BROWSER, not via sessions_send. NEVER use sessions_send or sessions_spawn for this — those do not open a visible chat. The entire interaction happens inside the browser UI, start to finish, exactly like a human clicking around the dashboard.`,
|
|
1135
|
+
`- Your runner: ${useHampagent ? 'Hampagent' : 'OpenClaw'}. Running on: ${homedir().split('/').pop()}@${hostname()}.`,
|
|
992
1136
|
(!conversationHistory || conversationHistory.length === 0)
|
|
993
1137
|
? `- This is the first message. When greeting, say: "I'm [your name] — your ${useHampagent ? 'Hampagent' : 'OpenClaw'} agent running on AgentForge." Never say "autonomous AI agent". Never list capabilities in an intro.`
|
|
994
1138
|
: `- This is a continuing conversation. Do NOT re-introduce yourself.`,
|
|
@@ -996,14 +1140,28 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
996
1140
|
? `- Your name is "${agentName}"${agentEmoji ? ` ${agentEmoji}` : ''}. This is your AgentForge identity. Do not ask the user who you are or what your name is — you already know.`
|
|
997
1141
|
: null,
|
|
998
1142
|
taskCwd && taskCwd !== agentWorkspaceDir
|
|
999
|
-
?
|
|
1143
|
+
? (() => {
|
|
1144
|
+
try {
|
|
1145
|
+
const entries = readdirSync(taskCwd).filter(e => !e.startsWith('.')).sort();
|
|
1146
|
+
return `- Projects folder: "${taskCwd}"\n Available projects: ${entries.join(', ')}\n To work on a project, go to "${taskCwd}/<project name>". Folder names may contain spaces — quote paths. Do NOT ask the user where code is or what stack it uses — the project list is above, find it and read the code.`;
|
|
1147
|
+
} catch {
|
|
1148
|
+
return `- Projects folder: "${taskCwd}". Find projects with ls. Do NOT ask the user where code is.`;
|
|
1149
|
+
}
|
|
1150
|
+
})()
|
|
1000
1151
|
: null,
|
|
1001
1152
|
agentWorkspaceDir
|
|
1002
1153
|
? `- Screenshots: screencapture -x ${agentWorkspaceDir}/ss1.png && sips -Z 1280 ${agentWorkspaceDir}/ss1.png (MUST resize — API rejects images over 5MB). Send to chat with: echo "AGENTFORGE_IMAGE:${agentWorkspaceDir}/ss1.png". Always screenshot visual work before saying done. NEVER use "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --headless for screenshots — use screencapture only.`
|
|
1003
1154
|
: `- Screenshots: screencapture -x /tmp/ss1.png && sips -Z 1280 /tmp/ss1.png (MUST resize — API rejects images over 5MB). Send to chat with: echo "AGENTFORGE_IMAGE:/tmp/ss1.png". Always screenshot visual work before saying done. NEVER use "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --headless for screenshots — use screencapture only.`,
|
|
1155
|
+
`- QUALITY STANDARD FOR VISUAL WORK: Screenshot everything and critique harshly. HARD FAILS: (1) any unreadable contrast. (2) NEVER default to dark background + purple/indigo/blue-gray accent + rounded cards — that specific combination is AI-generated slop. Dark themes are fine when intentional; the ban is on lazy defaults, not dark aesthetics. (3) NEVER use the rounded-square block logo with a centered symbol — teal/orange/blue square with a snowflake, asterisk, key, or geometric shape inside is the single most overused AI logo pattern and is immediately recognizable as LLM output. If the app needs a logo or icon, design something that actually reflects the product concept: wordmarks, custom letterforms, logotypes, or illustrated marks — not a colored square with a centered glyph. Every UI needs a deliberate distinct visual identity: a dominant color that fits the app, strong typographic scale, and one signature visual element that makes it look like a real product. Use actual Google Search for competitors and design research — use \`openclaw browser open "https://www.google.com/search?q=best+[app+type]+app"\` to open a VISIBLE browser tab the user can watch. Do NOT use any internal search command — it must open as a real browser tab. Once loaded: (1) screenshot the full results page including the AI overview at the top, (2) also open \`https://www.google.com/search?q=best+[app+type]+app&tbm=isch\` in another tab to see Google Images of real UI designs and screenshot that too, (3) keep the search tab open while you work — do not close it immediately, (4) then click through to the top-ranked modern products from the actual results. DO NOT pick sites from training data. DO NOT visit Wikipedia, old forums, or anything that is not a live modern product. Trust the search results on screen. NEVER seed or generate fake data in code — no generateSampleHistory(), no sampleData arrays, no initialWorkouts, no seed scripts, no "preloaded" content of any kind. When localStorage/DB is empty, show an empty state. "Realistic sample data pre-loaded" is not a feature, it is a bug. To test the app, open it in the browser and add data through the UI as a real user would.`,
|
|
1156
|
+
`- NARRATE YOUR WORK IN REAL TIME — THIS IS CRITICAL: The user is watching and sees nothing while you work. You must send short chat messages as you go so they know what is happening. Do NOT work silently for minutes then dump everything at the end. After every significant action, write a brief update: what you just did, what you found, what you are doing next. Examples: "Opening Obsidian and Roam to study the UI patterns..." / "Found it — Craft uses warm cream + serif type, very distinct. Stealing that direction." / "Building the graph view now, using D3 force-directed layout." / "Graph is working. Testing bi-directional links next." One or two sentences is enough — just keep the user in the loop continuously. Think of it like pair programming where you narrate out loud. Never go more than 60 seconds without sending an update.`,
|
|
1157
|
+
`- RESEARCH STANDARD: When asked to research before building, visit MINIMUM 5 sources. Navigate to actual live apps and take screenshots — reading a description is not research. Extract specific named UI patterns you are borrowing. Shallow research = shallow output.`,
|
|
1158
|
+
`- BROWSER PROFILE — ALWAYS VISIBLE: ALWAYS use profile="agentforge" on every single browser call: \`browser(action="...", profile="agentforge")\`. NEVER use the default headless openclaw browser — it is invisible to the user. The agentforge profile runs on port 9223 and is a real Chrome window the user can watch. If you omit profile="agentforge", the user cannot see what you are doing.`,
|
|
1159
|
+
`- BROWSER TAB RULE: \`openclaw browser navigate <url>\` loads the URL into the currently focused tab — which is the user's live AgentForge dashboard. Calling navigate with an external URL DESTROYS the user's chat view. Always use \`openclaw browser open <url>\` for any external site — it opens a new tab and leaves the dashboard untouched. When done researching, close those tabs with \`openclaw browser close <id>\` and restore the dashboard with \`openclaw browser focus <id>\`.`,
|
|
1160
|
+
`- CURRENT YEAR IS ${new Date().getFullYear()}. Never hardcode 2024 or 2025 in footers, copyright notices, or anywhere else. Always use ${new Date().getFullYear()}.`,
|
|
1161
|
+
`- BUILD QUALITY — NO LAZY ONE-FILE SETUPS: When asked to build something, build it properly. A real app has a real structure: separate files for server, frontend, styles, and logic. No single-file HTML dumps with everything jammed together. No "simple version" shortcuts. If the user asks for a web app, build a web app — with a backend (Node/Express or equivalent), a real frontend, a database or persistent storage, and proper file organization. The only exception is if the user explicitly asks for a quick prototype or single-file output. When in doubt, build the real thing.`,
|
|
1004
1162
|
`]`
|
|
1005
1163
|
].filter(Boolean).join('\n');
|
|
1006
|
-
finalMessage = platformContext + '\n\n' + finalMessage;
|
|
1164
|
+
finalMessage = platformContext + '\n\n' + (projectContext ? projectContext + '\n\n' : '') + finalMessage;
|
|
1007
1165
|
|
|
1008
1166
|
// If conversation history was loaded from DB (e.g. session expired, worker restarted,
|
|
1009
1167
|
// or user returning hours later), prepend it so the agent has full context.
|
|
@@ -1039,7 +1197,7 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
1039
1197
|
return text.replace(/\[System context:[\s\S]*?\n\]/g, '').trim();
|
|
1040
1198
|
};
|
|
1041
1199
|
const historyText = conversationHistory
|
|
1042
|
-
.slice(-
|
|
1200
|
+
.slice(-50) // last 50 messages of conversation history
|
|
1043
1201
|
.map(msg => {
|
|
1044
1202
|
const role = msg.role === 'user' ? 'User' : 'Assistant';
|
|
1045
1203
|
const content = msg.role === 'user'
|
|
@@ -1074,6 +1232,9 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
1074
1232
|
let taskResult;
|
|
1075
1233
|
let iterationMessage = finalMessage;
|
|
1076
1234
|
|
|
1235
|
+
// Agents use the browser concurrently — each tracks its own tab IDs.
|
|
1236
|
+
// No global lock; locking serialized all work behind any single browsing agent.
|
|
1237
|
+
|
|
1077
1238
|
while (iteration < MAX_ITERATIONS) {
|
|
1078
1239
|
iteration++;
|
|
1079
1240
|
|
|
@@ -1092,7 +1253,7 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
1092
1253
|
console.log(`[${taskId}] 🏃 Runner: ${useHampagent ? '⚡ HAMPAGENT' : '🔧 OPENCLAW'} — agent ${agentId} iteration ${iteration}`);
|
|
1093
1254
|
const runAgentStart = Date.now();
|
|
1094
1255
|
taskResult = await activeRunner.runAgentTask(
|
|
1095
|
-
agentId, iterationMessage, taskCwd, sessionId, iteration === 1 ? image : null, browserProfile, actualWorkDir
|
|
1256
|
+
agentId, iterationMessage, taskCwd, sessionId, iteration === 1 ? image : null, browserProfile, actualWorkDir, agentModel || null
|
|
1096
1257
|
);
|
|
1097
1258
|
const runAgentDuration = Date.now() - runAgentStart;
|
|
1098
1259
|
console.log(`[${taskId}] runAgentTask iteration ${iteration} returned after ${runAgentDuration}ms, success=${taskResult?.success}`);
|
|
@@ -1159,6 +1320,7 @@ export class AgentForgeWorker extends EventEmitter {
|
|
|
1159
1320
|
console.log(`[${taskId}] Got identity in ${Date.now() - identityStart}ms: ${identity.identityName}`);
|
|
1160
1321
|
}
|
|
1161
1322
|
|
|
1323
|
+
|
|
1162
1324
|
// Send completion with identity info, final response text, and sessionId for maestro
|
|
1163
1325
|
// Filter openclaw's "No reply from agent." placeholder — it appears when the agent only
|
|
1164
1326
|
// used tools with no text response (e.g. TTS-only tasks). If we send it, the browser's
|
|
@@ -1553,17 +1715,9 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
|
|
|
1553
1715
|
}
|
|
1554
1716
|
}
|
|
1555
1717
|
|
|
1556
|
-
// Record that an agent produced output (reset stuck detection)
|
|
1557
|
-
recordAgentActivity(agentId) {
|
|
1558
|
-
this.lastAgentActivity.set(agentId, Date.now());
|
|
1559
|
-
this.pingsSinceActivity.set(agentId, 0);
|
|
1560
|
-
}
|
|
1561
|
-
|
|
1562
1718
|
// Collect detailed diagnostics for debug agent
|
|
1563
1719
|
collectDiagnostics(agentId, taskId, error, reason) {
|
|
1564
1720
|
const taskInfo = this.runningTasks.get(taskId);
|
|
1565
|
-
const lastActivity = this.lastAgentActivity.get(agentId);
|
|
1566
|
-
const pings = this.pingsSinceActivity.get(agentId) || 0;
|
|
1567
1721
|
const processingTime = this.processingStartTime.get(agentId);
|
|
1568
1722
|
|
|
1569
1723
|
return {
|
|
@@ -1577,9 +1731,6 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
|
|
|
1577
1731
|
name: error.name
|
|
1578
1732
|
} : null,
|
|
1579
1733
|
activity: {
|
|
1580
|
-
lastActivityTime: lastActivity,
|
|
1581
|
-
timeSinceActivity: lastActivity ? Date.now() - lastActivity : null,
|
|
1582
|
-
pingsSinceActivity: pings,
|
|
1583
1734
|
processingStartTime: processingTime,
|
|
1584
1735
|
processingDuration: processingTime ? Date.now() - processingTime : null
|
|
1585
1736
|
},
|
|
@@ -1605,121 +1756,8 @@ Review and add specific steps, pitfalls, and patterns that helped succeed.
|
|
|
1605
1756
|
});
|
|
1606
1757
|
}
|
|
1607
1758
|
|
|
1608
|
-
// Check for stuck agents on each ping
|
|
1609
|
-
checkForStuckAgents() {
|
|
1610
|
-
for (const [agentId, isProcessing] of this.agentProcessing.entries()) {
|
|
1611
|
-
if (isProcessing) {
|
|
1612
|
-
// First, check if the process is still alive - if so, it's probably just thinking
|
|
1613
|
-
const agentInfo = this.cli.activeAgents?.get(agentId);
|
|
1614
|
-
const pid = agentInfo?.proc?.pid;
|
|
1615
|
-
if (pid) {
|
|
1616
|
-
try {
|
|
1617
|
-
// process.kill(pid, 0) checks if process exists without killing it
|
|
1618
|
-
process.kill(pid, 0);
|
|
1619
|
-
// Process is alive - record activity to prevent false stuck detection
|
|
1620
|
-
// This handles cases where the CLI is blocking on API calls with no stdout
|
|
1621
|
-
this.recordAgentActivity(agentId);
|
|
1622
|
-
} catch (e) {
|
|
1623
|
-
// Process is dead - let stuck detection proceed
|
|
1624
|
-
console.log(`⚠️ Agent ${agentId} process (PID ${pid}) appears dead`);
|
|
1625
|
-
}
|
|
1626
|
-
}
|
|
1627
|
-
|
|
1628
|
-
// Increment ping counter for this agent
|
|
1629
|
-
const pings = (this.pingsSinceActivity.get(agentId) || 0) + 1;
|
|
1630
|
-
this.pingsSinceActivity.set(agentId, pings);
|
|
1631
|
-
|
|
1632
|
-
// Check if there's an active task for this agent
|
|
1633
|
-
let hasActiveTask = false;
|
|
1634
|
-
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1635
|
-
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1636
|
-
hasActiveTask = true;
|
|
1637
|
-
break;
|
|
1638
|
-
}
|
|
1639
|
-
}
|
|
1640
|
-
|
|
1641
|
-
// Use very long threshold if task is active (10 pings = 300s / 5 min)
|
|
1642
|
-
// OpenClaw embedded agents can spend 2-3+ minutes on complex reasoning
|
|
1643
|
-
// Only mark as stuck if truly unresponsive (no output for 5+ minutes)
|
|
1644
|
-
const threshold = hasActiveTask ? 10 : this.STUCK_PING_THRESHOLD;
|
|
1645
|
-
|
|
1646
|
-
// Log warning when agent is quiet but not yet stuck (helps with debugging)
|
|
1647
|
-
if (pings >= this.STUCK_PING_THRESHOLD && pings < threshold) {
|
|
1648
|
-
console.log(`⚠️ Agent ${agentId} quiet for ${pings} pings (${Math.round((Date.now() - this.lastAgentActivity.get(agentId)) / 1000)}s), but task is active - waiting...`);
|
|
1649
|
-
}
|
|
1650
|
-
|
|
1651
|
-
if (pings >= threshold) {
|
|
1652
|
-
const lastActivity = this.lastAgentActivity.get(agentId);
|
|
1653
|
-
const elapsed = lastActivity ? Math.round((Date.now() - lastActivity) / 1000) : '?';
|
|
1654
|
-
const reason = hasActiveTask ? 'no output for 300s+ AND process dead' : 'no active task';
|
|
1655
|
-
console.log(`🚨 STUCK DETECTED: Agent ${agentId} has had ${pings} pings with no activity (${reason}, last activity: ${elapsed}s ago)`);
|
|
1656
|
-
console.log(`🚨 Force resetting agent ${agentId} to accept new tasks`);
|
|
1657
|
-
|
|
1658
|
-
// Find the task for diagnostics
|
|
1659
|
-
let stuckTaskId = null;
|
|
1660
|
-
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1661
|
-
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1662
|
-
stuckTaskId = taskId;
|
|
1663
|
-
break;
|
|
1664
|
-
}
|
|
1665
|
-
}
|
|
1666
|
-
|
|
1667
|
-
// Collect diagnostics before cleanup
|
|
1668
|
-
const diagnostics = this.collectDiagnostics(
|
|
1669
|
-
agentId,
|
|
1670
|
-
stuckTaskId,
|
|
1671
|
-
new Error(`Agent unresponsive for ${elapsed}s`),
|
|
1672
|
-
'stuck'
|
|
1673
|
-
);
|
|
1674
|
-
|
|
1675
|
-
// Force kill the process — try both runners
|
|
1676
|
-
this.cli.cancelAgent(agentId);
|
|
1677
|
-
this.hampagent?.cancelAgent(agentId);
|
|
1678
|
-
|
|
1679
|
-
// Clear all state for this agent
|
|
1680
|
-
this.agentProcessing.set(agentId, false);
|
|
1681
|
-
this.processingStartTime.delete(agentId);
|
|
1682
|
-
this.pingsSinceActivity.set(agentId, 0);
|
|
1683
|
-
|
|
1684
|
-
// Find and cancel any running task for this agent
|
|
1685
|
-
for (const [taskId, taskInfo] of this.runningTasks.entries()) {
|
|
1686
|
-
if (taskInfo.agentId === agentId && !taskInfo.cancelled) {
|
|
1687
|
-
taskInfo.cancelled = true;
|
|
1688
|
-
this.runningTasks.delete(taskId);
|
|
1689
|
-
this.send({
|
|
1690
|
-
type: 'task_failed',
|
|
1691
|
-
taskId,
|
|
1692
|
-
agentId,
|
|
1693
|
-
error: 'Agent became unresponsive (stuck detection triggered)'
|
|
1694
|
-
});
|
|
1695
|
-
}
|
|
1696
|
-
}
|
|
1697
|
-
|
|
1698
|
-
// Send debug report
|
|
1699
|
-
this.sendDebugReport(diagnostics, `Agent ${agentId} became unresponsive after ${elapsed}s with no activity`);
|
|
1700
|
-
|
|
1701
|
-
// Process any queued tasks
|
|
1702
|
-
const queue = this.agentQueues.get(agentId);
|
|
1703
|
-
if (queue && queue.length > 0) {
|
|
1704
|
-
console.log(`📤 Processing ${queue.length} queued tasks after stuck recovery`);
|
|
1705
|
-
setImmediate(() => this.processQueue(agentId));
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
}
|
|
1709
|
-
}
|
|
1710
|
-
}
|
|
1711
|
-
|
|
1712
1759
|
async shutdown() {
|
|
1713
1760
|
console.log('🛑 Shutting down worker...');
|
|
1714
|
-
// Kill all active agent processes so they don't become orphans on restart
|
|
1715
|
-
if (this.cli && typeof this.cli.cancelAgent === 'function') {
|
|
1716
|
-
for (const agentId of this.agentProcessing.keys()) {
|
|
1717
|
-
if (this.agentProcessing.get(agentId)) {
|
|
1718
|
-
console.log(`🔪 Killing agent process: ${agentId}`);
|
|
1719
|
-
try { this.cli.cancelAgent(agentId); } catch (e) { /* already dead */ }
|
|
1720
|
-
}
|
|
1721
|
-
}
|
|
1722
|
-
}
|
|
1723
1761
|
if (this.ws) {
|
|
1724
1762
|
this.ws.close();
|
|
1725
1763
|
}
|