aether-mcp-server 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aether-memory-store.js +431 -0
- package/dist/agent.js +1046 -0
- package/dist/captcha-solver.js +303 -0
- package/dist/cdp-bridge.js +2317 -0
- package/dist/cdp-client.js +1602 -0
- package/dist/index.js +71 -0
- package/dist/locator-engine.js +326 -0
- package/dist/mcp-responses.js +20 -0
- package/dist/mcp-server.js +1236 -0
- package/dist/mcp-task-memory.js +36 -0
- package/dist/page-snapshot-cache.js +75 -0
- package/dist/policy-client.js +104 -0
- package/dist/stealth.js +137 -0
- package/dist/trace-recorder.js +46 -0
- package/dist/ws-server.js +134 -0
- package/package.json +35 -0
package/dist/agent.js
ADDED
|
@@ -0,0 +1,1046 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.aetherNavigationAgent = exports.aetherTools = exports.activeTasks = void 0;
|
|
37
|
+
exports.getTaskEventLog = getTaskEventLog;
|
|
38
|
+
exports.updateScratchpad = updateScratchpad;
|
|
39
|
+
exports.saveTaskState = saveTaskState;
|
|
40
|
+
exports.restoreUnfinishedTasks = restoreUnfinishedTasks;
|
|
41
|
+
exports.recordExtensionFailure = recordExtensionFailure;
|
|
42
|
+
exports.recordExtensionSuccess = recordExtensionSuccess;
|
|
43
|
+
exports.isCircuitBreakerOpen = isCircuitBreakerOpen;
|
|
44
|
+
exports.getCircuitBreakerStatus = getCircuitBreakerStatus;
|
|
45
|
+
exports.purgeCompletedTasks = purgeCompletedTasks;
|
|
46
|
+
exports.delegateToBrowser = delegateToBrowser;
|
|
47
|
+
const langgraph_1 = require("@langchain/langgraph");
|
|
48
|
+
const google_genai_1 = require("@langchain/google-genai");
|
|
49
|
+
const messages_1 = require("@langchain/core/messages");
|
|
50
|
+
const tools_1 = require("@langchain/core/tools");
|
|
51
|
+
const prebuilt_1 = require("@langchain/langgraph/prebuilt");
|
|
52
|
+
const zod_1 = require("zod");
|
|
53
|
+
const dotenv = __importStar(require("dotenv"));
|
|
54
|
+
const fs = __importStar(require("fs"));
|
|
55
|
+
const path = __importStar(require("path"));
|
|
56
|
+
const ws_server_1 = require("./ws-server");
|
|
57
|
+
dotenv.config();
|
|
58
|
+
// ============================================================================
|
|
59
|
+
// CONSTANTS
|
|
60
|
+
// ============================================================================
|
|
61
|
+
const MAX_CONSECUTIVE_ERRORS = 5; // Force-fail a task after this many back-to-back tool errors
|
|
62
|
+
const TASK_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes max per task
|
|
63
|
+
const RETRY_DELAYS_MS = [1000, 2000, 4000, 8000, 16000]; // Exponential backoff for tool retries
|
|
64
|
+
const MAX_TOOL_RETRIES = 5;
|
|
65
|
+
const LLM_RETRY_DELAYS_MS = [2000, 4000, 8000]; // Exponential backoff for LLM API calls
|
|
66
|
+
const MAX_LLM_RETRIES = 3;
|
|
67
|
+
const MAX_EVENT_LOG_TOTAL = 5000; // Global cap on EVENT_LOG size (all tasks combined)
|
|
68
|
+
const MAX_ACTIVE_TASKS = 100; // Prevent unbounded memory growth
|
|
69
|
+
const COMPLETED_TASK_TTL_MS = 5 * 60 * 1000; // 5 minutes before completed tasks are purged
|
|
70
|
+
const CIRCUIT_BREAKER_THRESHOLD = 10; // Consecutive extension failures before circuit opens
|
|
71
|
+
const CIRCUIT_BREAKER_COOLDOWN_MS = 60 * 1000; // 1 minute cooldown before circuit half-opens
|
|
72
|
+
const EVENT_LOG = [];
|
|
73
|
+
const MAX_EVENT_LOG_SIZE = 500; // Keep last N events per task
|
|
74
|
+
function emitAgentEvent(event) {
|
|
75
|
+
// Trim old events if log is too large
|
|
76
|
+
const eventsForTask = EVENT_LOG.filter(e => e.taskId === event.taskId);
|
|
77
|
+
if (eventsForTask.length > MAX_EVENT_LOG_SIZE) {
|
|
78
|
+
const toRemove = eventsForTask.length - MAX_EVENT_LOG_SIZE;
|
|
79
|
+
let removed = 0;
|
|
80
|
+
for (let i = EVENT_LOG.length - 1; i >= 0 && removed < toRemove; i--) {
|
|
81
|
+
if (EVENT_LOG[i].taskId === event.taskId) {
|
|
82
|
+
EVENT_LOG.splice(i, 1);
|
|
83
|
+
removed++;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
EVENT_LOG.push(event);
|
|
88
|
+
// Also emit to the taskEvents bus for SSE consumers
|
|
89
|
+
ws_server_1.taskEvents.emit("agent_event", event);
|
|
90
|
+
}
|
|
91
|
+
function getTaskEventLog(taskId) {
|
|
92
|
+
return EVENT_LOG.filter(e => e.taskId === taskId);
|
|
93
|
+
}
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// GLOBAL TASK TRACKER & SCRATCHPAD LOGGER
|
|
96
|
+
// ============================================================================
|
|
97
|
+
exports.activeTasks = new Map();
|
|
98
|
+
const SCRATCHPAD_DIR = path.join(process.cwd(), "..", ".aether_tasks");
|
|
99
|
+
if (!fs.existsSync(SCRATCHPAD_DIR)) {
|
|
100
|
+
fs.mkdirSync(SCRATCHPAD_DIR, { recursive: true });
|
|
101
|
+
}
|
|
102
|
+
// Async scratchpad write — does not block the event loop
|
|
103
|
+
async function updateScratchpad(taskId, content, append = true) {
|
|
104
|
+
const filePath = path.join(SCRATCHPAD_DIR, `${taskId}.md`);
|
|
105
|
+
const timestamp = new Date().toISOString();
|
|
106
|
+
const formattedContent = `\n## [${timestamp}]\n${content}\n`;
|
|
107
|
+
try {
|
|
108
|
+
if (append) {
|
|
109
|
+
await fs.promises.appendFile(filePath, formattedContent, "utf-8");
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
await fs.promises.writeFile(filePath, `# Task: ${taskId}\n${formattedContent}`, "utf-8");
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
console.error(`[Agent] Scratchpad write error for task ${taskId}: ${err.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
async function saveTaskState(taskId, objective, status, extra) {
|
|
120
|
+
const filePath = path.join(SCRATCHPAD_DIR, `${taskId}.state.json`);
|
|
121
|
+
let existing = {};
|
|
122
|
+
try {
|
|
123
|
+
existing = JSON.parse(await fs.promises.readFile(filePath, "utf-8"));
|
|
124
|
+
}
|
|
125
|
+
catch { /* file doesn't exist yet — that's fine */ }
|
|
126
|
+
const state = {
|
|
127
|
+
taskId,
|
|
128
|
+
objective: existing.objective ?? objective,
|
|
129
|
+
status,
|
|
130
|
+
startedAt: existing.startedAt ?? new Date().toISOString(),
|
|
131
|
+
errorCount: extra?.errorCount ?? existing.errorCount ?? 0,
|
|
132
|
+
stepCount: extra?.stepCount ?? existing.stepCount ?? 0,
|
|
133
|
+
...(status !== "running" ? { finishedAt: new Date().toISOString() } : {}),
|
|
134
|
+
};
|
|
135
|
+
try {
|
|
136
|
+
await fs.promises.writeFile(filePath, JSON.stringify(state, null, 2), "utf-8");
|
|
137
|
+
}
|
|
138
|
+
catch (err) {
|
|
139
|
+
console.error(`[Agent] Failed to persist task state for ${taskId}: ${err.message}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Called at server startup — restores any tasks that were "running" when the
|
|
144
|
+
* server previously crashed so that status checks return a meaningful message.
|
|
145
|
+
*/
|
|
146
|
+
async function restoreUnfinishedTasks() {
|
|
147
|
+
let files = [];
|
|
148
|
+
try {
|
|
149
|
+
files = await fs.promises.readdir(SCRATCHPAD_DIR);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
for (const file of files) {
|
|
155
|
+
if (!file.endsWith(".state.json"))
|
|
156
|
+
continue;
|
|
157
|
+
try {
|
|
158
|
+
const raw = await fs.promises.readFile(path.join(SCRATCHPAD_DIR, file), "utf-8");
|
|
159
|
+
const state = JSON.parse(raw);
|
|
160
|
+
if (state.status === "running") {
|
|
161
|
+
console.error(`[Agent] Restored crashed task: ${state.taskId}`);
|
|
162
|
+
exports.activeTasks.set(state.taskId, {
|
|
163
|
+
objective: state.objective,
|
|
164
|
+
status: "Crashed (server restarted)",
|
|
165
|
+
lastObservation: "Server was restarted while this task was in progress.",
|
|
166
|
+
shouldInterrupt: false,
|
|
167
|
+
startedAt: Date.now() - 60000, // Approximate
|
|
168
|
+
stepCount: state.stepCount || 0,
|
|
169
|
+
consecutiveErrors: 0,
|
|
170
|
+
toolCallHistory: [],
|
|
171
|
+
});
|
|
172
|
+
await saveTaskState(state.taskId, state.objective, "crashed");
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
catch (e) {
|
|
176
|
+
console.error(`[Agent] Could not restore state from ${file}: ${e.message}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// Helper: emit a task event and update activeTasks.status atomically
|
|
181
|
+
function emitTaskUpdate(taskId, status, extra = {}) {
|
|
182
|
+
const task = exports.activeTasks.get(taskId);
|
|
183
|
+
if (task) {
|
|
184
|
+
task.status = status;
|
|
185
|
+
// Track step count if provided
|
|
186
|
+
if (extra.stepCount !== undefined)
|
|
187
|
+
task.stepCount = extra.stepCount;
|
|
188
|
+
if (extra.consecutiveErrors !== undefined)
|
|
189
|
+
task.consecutiveErrors = extra.consecutiveErrors;
|
|
190
|
+
}
|
|
191
|
+
ws_server_1.taskEvents.emit("task_update", { taskId, status, timestamp: Date.now(), ...extra });
|
|
192
|
+
emitAgentEvent({
|
|
193
|
+
taskId,
|
|
194
|
+
timestamp: new Date().toISOString(),
|
|
195
|
+
eventType: "state_change",
|
|
196
|
+
data: { status, ...extra },
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
// ============================================================================
|
|
200
|
+
// TIMEOUT WRAPPER WITH RETRY
|
|
201
|
+
// ============================================================================
|
|
202
|
+
async function withTimeout(promise, ms = 30000, label = "Action") {
|
|
203
|
+
let timer;
|
|
204
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
205
|
+
timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms. The browser might be hanging.`)), ms);
|
|
206
|
+
});
|
|
207
|
+
return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timer));
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Retry wrapper with exponential backoff for flaky tool calls.
|
|
211
|
+
*/
|
|
212
|
+
async function withRetry(fn, taskId, toolName, maxRetries = MAX_TOOL_RETRIES) {
|
|
213
|
+
let lastError = null;
|
|
214
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
215
|
+
try {
|
|
216
|
+
return await fn();
|
|
217
|
+
}
|
|
218
|
+
catch (err) {
|
|
219
|
+
lastError = err;
|
|
220
|
+
emitAgentEvent({
|
|
221
|
+
taskId,
|
|
222
|
+
timestamp: new Date().toISOString(),
|
|
223
|
+
eventType: "tool_retry",
|
|
224
|
+
data: { tool: toolName, attempt: attempt + 1, error: err.message },
|
|
225
|
+
});
|
|
226
|
+
if (attempt < maxRetries) {
|
|
227
|
+
const delay = RETRY_DELAYS_MS[attempt] ?? RETRY_DELAYS_MS[RETRY_DELAYS_MS.length - 1];
|
|
228
|
+
void updateScratchpad(taskId, `⚠️ ${toolName} failed (attempt ${attempt + 1}/${maxRetries + 1}). Retrying in ${delay}ms...`);
|
|
229
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
throw lastError;
|
|
234
|
+
}
|
|
235
|
+
const circuitBreaker = {
|
|
236
|
+
state: "closed",
|
|
237
|
+
failureCount: 0,
|
|
238
|
+
lastFailureAt: 0,
|
|
239
|
+
lastStateChangeAt: Date.now(),
|
|
240
|
+
};
|
|
241
|
+
function recordExtensionFailure() {
|
|
242
|
+
circuitBreaker.failureCount++;
|
|
243
|
+
circuitBreaker.lastFailureAt = Date.now();
|
|
244
|
+
if (circuitBreaker.failureCount >= CIRCUIT_BREAKER_THRESHOLD && circuitBreaker.state !== "open") {
|
|
245
|
+
circuitBreaker.state = "open";
|
|
246
|
+
circuitBreaker.lastStateChangeAt = Date.now();
|
|
247
|
+
console.error(`[Agent] Circuit breaker OPENED after ${circuitBreaker.failureCount} consecutive extension failures`);
|
|
248
|
+
emitAgentEvent({
|
|
249
|
+
taskId: "system",
|
|
250
|
+
timestamp: new Date().toISOString(),
|
|
251
|
+
eventType: "critical_error",
|
|
252
|
+
data: { circuitBreaker: "opened", failureCount: circuitBreaker.failureCount },
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function recordExtensionSuccess() {
|
|
257
|
+
if (circuitBreaker.state === "half-open") {
|
|
258
|
+
circuitBreaker.state = "closed";
|
|
259
|
+
circuitBreaker.failureCount = 0;
|
|
260
|
+
circuitBreaker.lastStateChangeAt = Date.now();
|
|
261
|
+
console.error("[Agent] Circuit breaker CLOSED after successful extension call");
|
|
262
|
+
}
|
|
263
|
+
else if (circuitBreaker.state === "closed") {
|
|
264
|
+
circuitBreaker.failureCount = 0;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
function isCircuitBreakerOpen() {
|
|
268
|
+
if (circuitBreaker.state === "open") {
|
|
269
|
+
// Check if cooldown has elapsed — if so, half-open to allow a test call
|
|
270
|
+
const elapsed = Date.now() - circuitBreaker.lastStateChangeAt;
|
|
271
|
+
if (elapsed >= CIRCUIT_BREAKER_COOLDOWN_MS) {
|
|
272
|
+
circuitBreaker.state = "half-open";
|
|
273
|
+
circuitBreaker.lastStateChangeAt = Date.now();
|
|
274
|
+
console.error("[Agent] Circuit breaker HALF-OPEN — allowing test call");
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
277
|
+
return true;
|
|
278
|
+
}
|
|
279
|
+
return false;
|
|
280
|
+
}
|
|
281
|
+
function getCircuitBreakerStatus() {
|
|
282
|
+
return `${circuitBreaker.state} (failures: ${circuitBreaker.failureCount})`;
|
|
283
|
+
}
|
|
284
|
+
// ============================================================================
|
|
285
|
+
// MEMORY MANAGEMENT — purge old completed tasks and trim event log
|
|
286
|
+
// ============================================================================
|
|
287
|
+
/**
|
|
288
|
+
* Purge completed tasks that have exceeded their TTL to prevent memory leaks.
|
|
289
|
+
* Call this periodically (e.g., every 60 seconds).
|
|
290
|
+
*/
|
|
291
|
+
function purgeCompletedTasks() {
|
|
292
|
+
const now = Date.now();
|
|
293
|
+
for (const [taskId, task] of exports.activeTasks.entries()) {
|
|
294
|
+
const isTerminalStatus = ["success", "failed", "crashed", "interrupted", "timeout"].includes(task.status)
|
|
295
|
+
|| task.status.startsWith("Crashed");
|
|
296
|
+
if (isTerminalStatus && (now - task.startedAt) > COMPLETED_TASK_TTL_MS) {
|
|
297
|
+
exports.activeTasks.delete(taskId);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
// Update health endpoint with current count
|
|
301
|
+
(0, ws_server_1.setActiveTaskCount)(exports.activeTasks.size);
|
|
302
|
+
// Also trim EVENT_LOG if it exceeds global cap
|
|
303
|
+
while (EVENT_LOG.length > MAX_EVENT_LOG_TOTAL) {
|
|
304
|
+
EVENT_LOG.shift();
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
// Start periodic purge timer
|
|
308
|
+
const purgeInterval = setInterval(purgeCompletedTasks, 60_000);
|
|
309
|
+
// Prevent the interval from keeping the process alive if everything else exits
|
|
310
|
+
if (typeof purgeInterval.unref === "function") {
|
|
311
|
+
purgeInterval.unref();
|
|
312
|
+
}
|
|
313
|
+
// ============================================================================
|
|
314
|
+
// LLM API RETRY — resilient LLM invocation
|
|
315
|
+
// ============================================================================
|
|
316
|
+
async function invokeLLMWithRetry(messages, taskId) {
|
|
317
|
+
let lastError = null;
|
|
318
|
+
for (let attempt = 0; attempt <= MAX_LLM_RETRIES; attempt++) {
|
|
319
|
+
try {
|
|
320
|
+
emitAgentEvent({
|
|
321
|
+
taskId,
|
|
322
|
+
timestamp: new Date().toISOString(),
|
|
323
|
+
eventType: "llm_call",
|
|
324
|
+
data: { attempt: attempt + 1, messageCount: messages.length },
|
|
325
|
+
});
|
|
326
|
+
const response = await llm.invoke(messages);
|
|
327
|
+
emitAgentEvent({
|
|
328
|
+
taskId,
|
|
329
|
+
timestamp: new Date().toISOString(),
|
|
330
|
+
eventType: "llm_response",
|
|
331
|
+
data: { toolCalls: response.tool_calls?.length ?? 0 },
|
|
332
|
+
});
|
|
333
|
+
return response;
|
|
334
|
+
}
|
|
335
|
+
catch (err) {
|
|
336
|
+
lastError = err;
|
|
337
|
+
emitAgentEvent({
|
|
338
|
+
taskId,
|
|
339
|
+
timestamp: new Date().toISOString(),
|
|
340
|
+
eventType: "tool_retry",
|
|
341
|
+
data: { tool: "llm_invoke", attempt: attempt + 1, error: err.message },
|
|
342
|
+
});
|
|
343
|
+
if (attempt < MAX_LLM_RETRIES) {
|
|
344
|
+
const delay = LLM_RETRY_DELAYS_MS[attempt] ?? LLM_RETRY_DELAYS_MS[LLM_RETRY_DELAYS_MS.length - 1];
|
|
345
|
+
void updateScratchpad(taskId, `⚠️ LLM API call failed (attempt ${attempt + 1}/${MAX_LLM_RETRIES + 1}). Retrying in ${delay}ms...`);
|
|
346
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
throw lastError;
|
|
351
|
+
}
|
|
352
|
+
const graphState = {
|
|
353
|
+
messages: { value: (x, y) => x.concat(y), default: () => [] },
|
|
354
|
+
objective: { value: (x, y) => y ?? x, default: () => "" },
|
|
355
|
+
taskId: { value: (x, y) => y ?? x, default: () => `task-${Date.now()}` },
|
|
356
|
+
parentId: { value: (x, y) => y ?? x, default: () => undefined },
|
|
357
|
+
finalResult: { value: (x, y) => y ?? x, default: () => null },
|
|
358
|
+
status: { value: (x, y) => y ?? x, default: () => "pending" },
|
|
359
|
+
errorCount: { value: (x, y) => y !== undefined ? y : x, default: () => 0 },
|
|
360
|
+
stepCount: { value: (x, y) => y !== undefined ? y : x, default: () => 0 },
|
|
361
|
+
};
|
|
362
|
+
// ============================================================================
|
|
363
|
+
// 2. SYSTEM PROMPT
|
|
364
|
+
// ============================================================================
|
|
365
|
+
const AETHER_SYSTEM_PROMPT = `You are the Aether Navigation Agent — an elite browser automation specialist with Total Browser Command.
|
|
366
|
+
Your mission: Achieve the user's objective with maximum reliability and efficiency.
|
|
367
|
+
|
|
368
|
+
═══════════════════════════════════════════════════════════
|
|
369
|
+
🧠 CORE OPERATING PRINCIPLES
|
|
370
|
+
═══════════════════════════════════════════════════════════
|
|
371
|
+
|
|
372
|
+
1. NEVER GIVE UP — Every obstacle is a puzzle, not a dead end.
|
|
373
|
+
- If a selector fails → try a different selector
|
|
374
|
+
- If an element ID is stale → call get_state to refresh
|
|
375
|
+
- If a page won't load → try navigate again, or check URL
|
|
376
|
+
- If all DOM methods fail → use coordinate clicking
|
|
377
|
+
- If coordinates fail → try scrolling first, then retry
|
|
378
|
+
|
|
379
|
+
2. ALWAYS KNOW YOUR STATE — Never act blind.
|
|
380
|
+
- Call get_state BEFORE any interaction if you're uncertain about the current page
|
|
381
|
+
- After navigation, ALWAYS call get_state to confirm you arrived at the right page
|
|
382
|
+
- After clicking, call get_state to verify the click had the expected effect
|
|
383
|
+
- If get_state returns unexpected results, re-orient before proceeding
|
|
384
|
+
|
|
385
|
+
3. BE METHODICAL — Rushing causes mistakes.
|
|
386
|
+
- Break complex objectives into small, verifiable steps
|
|
387
|
+
- Verify each step succeeded before moving to the next
|
|
388
|
+
- Save intermediate findings with write_to_scratchpad
|
|
389
|
+
- If a multi-step flow breaks, use get_state to diagnose where you are
|
|
390
|
+
|
|
391
|
+
═══════════════════════════════════════════════════════════
|
|
392
|
+
🎯 INTERACTION HIERARCHY (use in order)
|
|
393
|
+
═══════════════════════════════════════════════════════════
|
|
394
|
+
|
|
395
|
+
TIER 1 — Aether IDs (Most Reliable):
|
|
396
|
+
- Elements marked with @ID (e.g., @12, @45) are the most precise handles
|
|
397
|
+
- Use: act(action="click", elementId="@12")
|
|
398
|
+
- These are stable across page re-renders
|
|
399
|
+
|
|
400
|
+
TIER 2 — Semantic Selectors:
|
|
401
|
+
- CSS selectors: act(action="click", selector="button[type='submit']")
|
|
402
|
+
- Text-based: act(action="click", selector="button:has-text('Sign In')")
|
|
403
|
+
- Attribute-based: act(action="type", selector="input[name='email']", value="test@test.com")
|
|
404
|
+
|
|
405
|
+
TIER 3 — Coordinate Clicking:
|
|
406
|
+
- Use computer_20241022 for pixel-perfect clicks when DOM methods fail
|
|
407
|
+
- Get coordinates from get_state interactive elements list
|
|
408
|
+
- Always scroll the element into view first if needed
|
|
409
|
+
|
|
410
|
+
TIER 4 — JavaScript Injection:
|
|
411
|
+
- Use execute_script for complex DOM manipulation
|
|
412
|
+
- Use cdp_command for protocol-level browser control
|
|
413
|
+
- This is your escape hatch for anything the other tools can't handle
|
|
414
|
+
|
|
415
|
+
═══════════════════════════════════════════════════════════
|
|
416
|
+
⚡ CRITICAL RULES
|
|
417
|
+
═══════════════════════════════════════════════════════════
|
|
418
|
+
|
|
419
|
+
✅ DO:
|
|
420
|
+
- Call get_state first to understand the current page
|
|
421
|
+
- Pass taskId to EVERY tool call for proper tracking
|
|
422
|
+
- Use write_to_scratchpad to save important findings
|
|
423
|
+
- Handle popups, cookies banners, and overlays immediately
|
|
424
|
+
- Wait for page to settle after navigation (use act(action="wait"))
|
|
425
|
+
- Verify each action succeeded before proceeding
|
|
426
|
+
- Try alternative approaches when something fails
|
|
427
|
+
|
|
428
|
+
❌ DON'T:
|
|
429
|
+
- Don't assume the page state — always verify with get_state
|
|
430
|
+
- Don't repeat the same failing action more than once
|
|
431
|
+
- Don't skip steps in a multi-step flow
|
|
432
|
+
- Don't forget to call complete_task when done
|
|
433
|
+
- Don't hallucinate element IDs — only use IDs from get_state output
|
|
434
|
+
- Don't proceed if the page clearly shows an error
|
|
435
|
+
|
|
436
|
+
═══════════════════════════════════════════════════════════
|
|
437
|
+
🔄 ERROR RECOVERY PROTOCOL
|
|
438
|
+
═══════════════════════════════════════════════════════════
|
|
439
|
+
|
|
440
|
+
When any tool returns an error:
|
|
441
|
+
1. READ the error message carefully
|
|
442
|
+
2. CALL get_state to understand current browser state
|
|
443
|
+
3. IDENTIFY what went wrong (element not found? page changed? timeout?)
|
|
444
|
+
4. CHOOSE an alternative approach from the interaction hierarchy
|
|
445
|
+
5. RETRY with the new approach
|
|
446
|
+
6. If all tiers fail, use execute_script as a last resort
|
|
447
|
+
|
|
448
|
+
═══════════════════════════════════════════════════════════
|
|
449
|
+
📋 TASK COMPLETION
|
|
450
|
+
═══════════════════════════════════════════════════════════
|
|
451
|
+
|
|
452
|
+
When the objective is FULLY achieved:
|
|
453
|
+
1. Save the result with write_to_scratchpad if needed
|
|
454
|
+
2. Call complete_task with:
|
|
455
|
+
- success: true
|
|
456
|
+
- result: The final answer, data, or confirmation message
|
|
457
|
+
- taskId: Your current task ID
|
|
458
|
+
3. The result should be concise but complete — include all information the user needs
|
|
459
|
+
|
|
460
|
+
If the objective CANNOT be achieved after exhausting all approaches:
|
|
461
|
+
1. Document what you tried and what failed in write_to_scratchpad
|
|
462
|
+
2. Call complete_task with:
|
|
463
|
+
- success: false
|
|
464
|
+
- result: Detailed explanation of what went wrong and what was attempted
|
|
465
|
+
|
|
466
|
+
═══════════════════════════════════════════════════════════
|
|
467
|
+
|
|
468
|
+
Remember: You are in control. Think step by step. Verify your assumptions.
|
|
469
|
+
Never give up — there is always another path.`;
|
|
470
|
+
// ============================================================================
|
|
471
|
+
// 3. AETHER NATIVE TOOLS
|
|
472
|
+
// ============================================================================
|
|
473
|
+
/**
|
|
474
|
+
* Helper: track tool call in the task tracker and emit events
|
|
475
|
+
*/
|
|
476
|
+
function trackToolCall(taskId, toolName) {
|
|
477
|
+
const task = exports.activeTasks.get(taskId);
|
|
478
|
+
if (task) {
|
|
479
|
+
task.stepCount++;
|
|
480
|
+
task.toolCallHistory.push({ tool: toolName, result: "success", timestamp: Date.now() });
|
|
481
|
+
// Keep only last 50 tool calls
|
|
482
|
+
if (task.toolCallHistory.length > 50) {
|
|
483
|
+
task.toolCallHistory = task.toolCallHistory.slice(-50);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
function trackToolError(taskId, toolName, error) {
|
|
488
|
+
const task = exports.activeTasks.get(taskId);
|
|
489
|
+
if (task) {
|
|
490
|
+
task.consecutiveErrors++;
|
|
491
|
+
task.stepCount++;
|
|
492
|
+
task.toolCallHistory.push({ tool: toolName, result: "error", timestamp: Date.now() });
|
|
493
|
+
if (task.toolCallHistory.length > 50) {
|
|
494
|
+
task.toolCallHistory = task.toolCallHistory.slice(-50);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
// Update circuit breaker
|
|
498
|
+
recordExtensionFailure();
|
|
499
|
+
emitAgentEvent({
|
|
500
|
+
taskId,
|
|
501
|
+
timestamp: new Date().toISOString(),
|
|
502
|
+
eventType: "tool_error",
|
|
503
|
+
data: { tool: toolName, error, consecutiveErrors: exports.activeTasks.get(taskId)?.consecutiveErrors ?? 0 },
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
function trackToolSuccess(taskId, toolName) {
|
|
507
|
+
const task = exports.activeTasks.get(taskId);
|
|
508
|
+
if (task) {
|
|
509
|
+
task.consecutiveErrors = 0; // Reset on success
|
|
510
|
+
}
|
|
511
|
+
// Update circuit breaker
|
|
512
|
+
recordExtensionSuccess();
|
|
513
|
+
emitAgentEvent({
|
|
514
|
+
taskId,
|
|
515
|
+
timestamp: new Date().toISOString(),
|
|
516
|
+
eventType: "tool_result",
|
|
517
|
+
data: { tool: toolName, success: true },
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Helper: check if extension is connected before attempting commands
|
|
522
|
+
*/
|
|
523
|
+
function checkExtensionConnection(taskId) {
|
|
524
|
+
if (!(0, ws_server_1.isExtensionConnected)()) {
|
|
525
|
+
trackToolError(taskId, "connection_check", "Extension not connected");
|
|
526
|
+
return { connected: false, error: "Browser extension is not connected. Ensure the extension is loaded and active." };
|
|
527
|
+
}
|
|
528
|
+
return { connected: true };
|
|
529
|
+
}
|
|
530
|
+
const actTool = (0, tools_1.tool)(async (input) => {
|
|
531
|
+
const taskId = input.taskId || "unknown";
|
|
532
|
+
const action = input.action;
|
|
533
|
+
const a = input;
|
|
534
|
+
const eid = a.elementId ? String(a.elementId).replace('@', '') : undefined;
|
|
535
|
+
// Connection check
|
|
536
|
+
const conn = checkExtensionConnection(taskId);
|
|
537
|
+
if (!conn.connected)
|
|
538
|
+
return `Error: ${conn.error}`;
|
|
539
|
+
void updateScratchpad(taskId, `▶️ ACT: ${action} | Target: ${input.elementId || input.selector || input.value || 'N/A'}`);
|
|
540
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "tool_call", data: { tool: "act", action, target: input.elementId || input.selector || 'coordinate' } });
|
|
541
|
+
try {
|
|
542
|
+
const execution = async () => {
|
|
543
|
+
if (action === "click") {
|
|
544
|
+
if (eid)
|
|
545
|
+
return await (0, ws_server_1.sendCommandToExtension)("click_element", { id: eid, text: a.value });
|
|
546
|
+
if (a.selector)
|
|
547
|
+
return await (0, ws_server_1.sendCommandToExtension)("click_element_by_selector", { selector: a.selector });
|
|
548
|
+
if (a.coordinate) {
|
|
549
|
+
const [x, y] = String(a.coordinate).split(',').map(Number);
|
|
550
|
+
return await (0, ws_server_1.sendCommandToExtension)("click", { x, y });
|
|
551
|
+
}
|
|
552
|
+
return await (0, ws_server_1.sendCommandToExtension)(action, a);
|
|
553
|
+
}
|
|
554
|
+
if (action === "type") {
|
|
555
|
+
if (eid || a.selector)
|
|
556
|
+
await (0, ws_server_1.sendCommandToExtension)(eid ? "click_element" : "click_element_by_selector", { id: eid, selector: a.selector });
|
|
557
|
+
return await (0, ws_server_1.sendCommandToExtension)("type", { text: a.value || a.text });
|
|
558
|
+
}
|
|
559
|
+
if (action === "navigate") {
|
|
560
|
+
return await (0, ws_server_1.sendCommandToExtension)("navigate", { url: a.value });
|
|
561
|
+
}
|
|
562
|
+
return await (0, ws_server_1.sendCommandToExtension)(action, a);
|
|
563
|
+
};
|
|
564
|
+
const resultMsg = await withTimeout(execution(), 30000, `act(${action})`);
|
|
565
|
+
const obs = `Success: ${typeof resultMsg === 'string' ? resultMsg : JSON.stringify(resultMsg)}`;
|
|
566
|
+
void updateScratchpad(taskId, `✅ ${obs}`);
|
|
567
|
+
trackToolCall(taskId, "act");
|
|
568
|
+
trackToolSuccess(taskId, "act");
|
|
569
|
+
return obs;
|
|
570
|
+
}
|
|
571
|
+
catch (err) {
|
|
572
|
+
const errorMsg = `Error: ${err.message}. Try an alternative method.`;
|
|
573
|
+
void updateScratchpad(taskId, `❌ ${errorMsg}`);
|
|
574
|
+
trackToolError(taskId, "act", err.message);
|
|
575
|
+
return errorMsg;
|
|
576
|
+
}
|
|
577
|
+
}, {
|
|
578
|
+
name: "act",
|
|
579
|
+
description: "Perform precise, high-speed actions in the browser.",
|
|
580
|
+
schema: zod_1.z.object({
|
|
581
|
+
action: zod_1.z.enum([
|
|
582
|
+
"navigate", "click", "type", "fill", "select", "check",
|
|
583
|
+
"hover", "scroll", "wait", "screenshot",
|
|
584
|
+
"new_tab", "switch_tab", "close_tab", "drag_and_drop", "upload_file", "get_logs",
|
|
585
|
+
"get_tree", "get_dom_tree", "configure", "print_pdf", "emulate_network",
|
|
586
|
+
"get_cookies", "set_cookie", "clear_cache", "set_geolocation", "set_timezone", "get_performance_metrics",
|
|
587
|
+
"start_screencast", "stop_screencast", "record_session",
|
|
588
|
+
"mock_network_request", "generate_artifact", "highlight_elements",
|
|
589
|
+
"assert", "start_tracing", "stop_tracing", "target_auto_attach", "enable_domain", "pause", "resume",
|
|
590
|
+
"screenshot_region", "verify_ui_state"
|
|
591
|
+
]),
|
|
592
|
+
selector: zod_1.z.string().optional(),
|
|
593
|
+
elementId: zod_1.z.string().optional(),
|
|
594
|
+
value: zod_1.z.string().optional(),
|
|
595
|
+
assertionType: zod_1.z.string().optional(),
|
|
596
|
+
coordinate: zod_1.z.string().optional(),
|
|
597
|
+
tabId: zod_1.z.number().optional(),
|
|
598
|
+
taskId: zod_1.z.string().optional().describe("Always pass the current taskId here"),
|
|
599
|
+
}),
|
|
600
|
+
});
|
|
601
|
+
const getStateTool = (0, tools_1.tool)(async (input) => {
|
|
602
|
+
const taskId = input.taskId || "unknown";
|
|
603
|
+
// Connection check
|
|
604
|
+
const conn = checkExtensionConnection(taskId);
|
|
605
|
+
if (!conn.connected)
|
|
606
|
+
return `Error: ${conn.error}`;
|
|
607
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "tool_call", data: { tool: "get_state" } });
|
|
608
|
+
try {
|
|
609
|
+
const result = await withTimeout((0, ws_server_1.sendCommandToExtension)("get_state", { screenshot: false }), 30000, "get_state");
|
|
610
|
+
if (!result) {
|
|
611
|
+
trackToolError(taskId, "get_state", "Received empty state");
|
|
612
|
+
return "Error: Received empty state from extension";
|
|
613
|
+
}
|
|
614
|
+
const elementsSummary = result.interactiveElements
|
|
615
|
+
? result.interactiveElements.map((el) => `[@${el.id}] ${el.tagName} "${el.text}" ${el.type ? 'type=' + el.type : ''}`).join("\n")
|
|
616
|
+
: "No elements found";
|
|
617
|
+
const obs = `Title: ${result.title}\nURL: ${result.url}\nInteractive Elements:\n${elementsSummary}`;
|
|
618
|
+
void updateScratchpad(taskId, `📍 Get State: ${result.title} | ${result.interactiveElements?.length || 0} elements`);
|
|
619
|
+
trackToolCall(taskId, "get_state");
|
|
620
|
+
trackToolSuccess(taskId, "get_state");
|
|
621
|
+
return obs;
|
|
622
|
+
}
|
|
623
|
+
catch (err) {
|
|
624
|
+
trackToolError(taskId, "get_state", err.message);
|
|
625
|
+
return `Error getting state: ${err.message}`;
|
|
626
|
+
}
|
|
627
|
+
}, {
|
|
628
|
+
name: "get_state",
|
|
629
|
+
description: "Capture the current browser state including interactive elements.",
|
|
630
|
+
schema: zod_1.z.object({ taskId: zod_1.z.string().optional() }),
|
|
631
|
+
});
|
|
632
|
+
const executeScriptTool = (0, tools_1.tool)(async (input) => {
|
|
633
|
+
const taskId = input.taskId || "unknown";
|
|
634
|
+
// Connection check
|
|
635
|
+
const conn = checkExtensionConnection(taskId);
|
|
636
|
+
if (!conn.connected)
|
|
637
|
+
return `Error: ${conn.error}`;
|
|
638
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "tool_call", data: { tool: "execute_script" } });
|
|
639
|
+
void updateScratchpad(taskId, `⚙️ Executing JS...`);
|
|
640
|
+
try {
|
|
641
|
+
const result = await withTimeout((0, ws_server_1.sendCommandToExtension)("evaluate", { script: String(input.script) }), 30000, "execute_script");
|
|
642
|
+
trackToolCall(taskId, "execute_script");
|
|
643
|
+
trackToolSuccess(taskId, "execute_script");
|
|
644
|
+
return `Result: ${JSON.stringify(result)}`;
|
|
645
|
+
}
|
|
646
|
+
catch (err) {
|
|
647
|
+
trackToolError(taskId, "execute_script", err.message);
|
|
648
|
+
return `Script Error: ${err.message}`;
|
|
649
|
+
}
|
|
650
|
+
}, {
|
|
651
|
+
name: "execute_script",
|
|
652
|
+
description: "Execute JS in the browser context.",
|
|
653
|
+
schema: zod_1.z.object({
|
|
654
|
+
script: zod_1.z.string(),
|
|
655
|
+
taskId: zod_1.z.string().optional()
|
|
656
|
+
}),
|
|
657
|
+
});
|
|
658
|
+
const cdpCommandTool = (0, tools_1.tool)(async (input) => {
|
|
659
|
+
const taskId = input.taskId || "unknown";
|
|
660
|
+
// Connection check
|
|
661
|
+
const conn = checkExtensionConnection(taskId);
|
|
662
|
+
if (!conn.connected)
|
|
663
|
+
return `Error: ${conn.error}`;
|
|
664
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "tool_call", data: { tool: "cdp_command", command: input.command } });
|
|
665
|
+
void updateScratchpad(taskId, `🔧 CDP: ${input.command}`);
|
|
666
|
+
try {
|
|
667
|
+
const result = await withTimeout((0, ws_server_1.sendCommandToExtension)("cdp_command", { command: input.command, args: input.args || {} }), 30000, "cdp_command");
|
|
668
|
+
trackToolCall(taskId, "cdp_command");
|
|
669
|
+
trackToolSuccess(taskId, "cdp_command");
|
|
670
|
+
return `CDP Result: ${JSON.stringify(result)}`;
|
|
671
|
+
}
|
|
672
|
+
catch (err) {
|
|
673
|
+
trackToolError(taskId, "cdp_command", err.message);
|
|
674
|
+
return `CDP Error: ${err.message}`;
|
|
675
|
+
}
|
|
676
|
+
}, {
|
|
677
|
+
name: "cdp_command",
|
|
678
|
+
description: "Execute raw Chrome DevTools Protocol command.",
|
|
679
|
+
schema: zod_1.z.object({
|
|
680
|
+
command: zod_1.z.string(),
|
|
681
|
+
args: zod_1.z.record(zod_1.z.any()).optional(),
|
|
682
|
+
taskId: zod_1.z.string().optional()
|
|
683
|
+
}),
|
|
684
|
+
});
|
|
685
|
+
const computerTool = (0, tools_1.tool)(async (input) => {
|
|
686
|
+
const taskId = input.taskId || "unknown";
|
|
687
|
+
// Connection check
|
|
688
|
+
const conn = checkExtensionConnection(taskId);
|
|
689
|
+
if (!conn.connected)
|
|
690
|
+
return `Error: ${conn.error}`;
|
|
691
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "tool_call", data: { tool: "computer_20241022", action: input.action } });
|
|
692
|
+
void updateScratchpad(taskId, `🖱️ Computer: ${input.action}`);
|
|
693
|
+
try {
|
|
694
|
+
const a = input;
|
|
695
|
+
let resultMsg = "";
|
|
696
|
+
if (a.action === "left_click" && a.coordinate) {
|
|
697
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("click", { x: a.coordinate[0], y: a.coordinate[1], button: "left", clickCount: 1 }), 30000, "computer_click");
|
|
698
|
+
resultMsg = `Left clicked at [${a.coordinate}]`;
|
|
699
|
+
}
|
|
700
|
+
else if (a.action === "right_click" && a.coordinate) {
|
|
701
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("click", { x: a.coordinate[0], y: a.coordinate[1], button: "right", clickCount: 1 }), 30000, "computer_right_click");
|
|
702
|
+
resultMsg = `Right clicked at [${a.coordinate}]`;
|
|
703
|
+
}
|
|
704
|
+
else if (a.action === "double_click" && a.coordinate) {
|
|
705
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("double_click", { x: a.coordinate[0], y: a.coordinate[1] }), 30000, "computer_double_click");
|
|
706
|
+
resultMsg = `Double clicked at [${a.coordinate}]`;
|
|
707
|
+
}
|
|
708
|
+
else if (a.action === "middle_click" && a.coordinate) {
|
|
709
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("click", { x: a.coordinate[0], y: a.coordinate[1], button: "middle", clickCount: 1 }), 30000, "computer_middle_click");
|
|
710
|
+
resultMsg = `Middle clicked at [${a.coordinate}]`;
|
|
711
|
+
}
|
|
712
|
+
else if (a.action === "mouse_move" && a.coordinate) {
|
|
713
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("mouse_move", { x: a.coordinate[0], y: a.coordinate[1] }), 30000, "computer_mouse_move");
|
|
714
|
+
resultMsg = `Moved mouse to [${a.coordinate}]`;
|
|
715
|
+
}
|
|
716
|
+
else if (a.action === "left_click_drag" && a.coordinate && a.start_coordinate) {
|
|
717
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("drag", {
|
|
718
|
+
startX: a.start_coordinate[0], startY: a.start_coordinate[1],
|
|
719
|
+
endX: a.coordinate[0], endY: a.coordinate[1]
|
|
720
|
+
}), 30000, "computer_drag");
|
|
721
|
+
resultMsg = `Dragged from [${a.start_coordinate}] to [${a.coordinate}]`;
|
|
722
|
+
}
|
|
723
|
+
else if (a.action === "type" && a.text) {
|
|
724
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("type", { text: a.text }), 30000, "computer_type");
|
|
725
|
+
resultMsg = `Typed: ${a.text}`;
|
|
726
|
+
}
|
|
727
|
+
else if (a.action === "key" && a.text) {
|
|
728
|
+
await withTimeout((0, ws_server_1.sendCommandToExtension)("type", { text: a.text }), 30000, "computer_key");
|
|
729
|
+
resultMsg = `Key pressed: ${a.text}`;
|
|
730
|
+
}
|
|
731
|
+
else if (a.action === "screenshot") {
|
|
732
|
+
const data = await withTimeout((0, ws_server_1.sendCommandToExtension)("screenshot_region", { x: 0, y: 0, width: 1920, height: 1080 }), 30000, "computer_screenshot");
|
|
733
|
+
resultMsg = data ? `Screenshot taken (${String(data).length} chars)` : "Screenshot taken";
|
|
734
|
+
}
|
|
735
|
+
else if (a.action === "cursor_position") {
|
|
736
|
+
resultMsg = "Cursor position tracking not available via CDP; use get_state to read DOM positions.";
|
|
737
|
+
}
|
|
738
|
+
else {
|
|
739
|
+
resultMsg = `Action '${a.action}' requires a coordinate or text parameter. Check your input.`;
|
|
740
|
+
}
|
|
741
|
+
trackToolCall(taskId, "computer_20241022");
|
|
742
|
+
trackToolSuccess(taskId, "computer_20241022");
|
|
743
|
+
return resultMsg;
|
|
744
|
+
}
|
|
745
|
+
catch (err) {
|
|
746
|
+
trackToolError(taskId, "computer_20241022", err.message);
|
|
747
|
+
return `Computer Tool Error: ${err.message}`;
|
|
748
|
+
}
|
|
749
|
+
}, {
|
|
750
|
+
name: "computer_20241022",
|
|
751
|
+
description: "Native Anthropic Computer Use API implementation for zero-shot browser control via X,Y coordinates.",
|
|
752
|
+
schema: zod_1.z.object({
|
|
753
|
+
action: zod_1.z.enum([
|
|
754
|
+
"key", "type", "mouse_move", "left_click",
|
|
755
|
+
"left_click_drag", "right_click", "middle_click",
|
|
756
|
+
"double_click", "screenshot", "cursor_position"
|
|
757
|
+
]),
|
|
758
|
+
coordinate: zod_1.z.array(zod_1.z.number()).optional().describe("Target [x, y] coordinate. For left_click_drag, this is the END coordinate."),
|
|
759
|
+
start_coordinate: zod_1.z.array(zod_1.z.number()).optional().describe("For left_click_drag only: the START [x, y] coordinate."),
|
|
760
|
+
text: zod_1.z.string().optional().describe("Text to type or key name to press (e.g. 'Return', 'ctrl+c')."),
|
|
761
|
+
taskId: zod_1.z.string().optional()
|
|
762
|
+
}),
|
|
763
|
+
});
|
|
764
|
+
const writeScratchpadTool = (0, tools_1.tool)(async (input) => {
|
|
765
|
+
const taskId = input.taskId || "unknown";
|
|
766
|
+
await updateScratchpad(taskId, `### AGENT NOTES\n${input.notes}`);
|
|
767
|
+
trackToolCall(taskId, "write_to_scratchpad");
|
|
768
|
+
trackToolSuccess(taskId, "write_to_scratchpad");
|
|
769
|
+
return "Notes saved to scratchpad.";
|
|
770
|
+
}, {
|
|
771
|
+
name: "write_to_scratchpad",
|
|
772
|
+
description: "Use this to save partial data, notes, or findings while navigating complex multi-step tasks so you don't forget them.",
|
|
773
|
+
schema: zod_1.z.object({
|
|
774
|
+
notes: zod_1.z.string().describe("The information you want to save."),
|
|
775
|
+
taskId: zod_1.z.string().optional(),
|
|
776
|
+
}),
|
|
777
|
+
});
|
|
778
|
+
const completeTaskTool = (0, tools_1.tool)(async (input) => {
|
|
779
|
+
const taskId = input.taskId || "unknown";
|
|
780
|
+
await updateScratchpad(taskId, `### TASK COMPLETED\nSuccess: ${input.success}\nResult payload has been saved.`);
|
|
781
|
+
emitAgentEvent({
|
|
782
|
+
taskId,
|
|
783
|
+
timestamp: new Date().toISOString(),
|
|
784
|
+
eventType: "completion",
|
|
785
|
+
data: { success: input.success, result: input.result },
|
|
786
|
+
});
|
|
787
|
+
trackToolCall(taskId, "complete_task");
|
|
788
|
+
trackToolSuccess(taskId, "complete_task");
|
|
789
|
+
return "Payload saved successfully. Stop execution.";
|
|
790
|
+
}, {
|
|
791
|
+
name: "complete_task",
|
|
792
|
+
description: "Call this tool when the objective has been successfully achieved to pass data back.",
|
|
793
|
+
schema: zod_1.z.object({
|
|
794
|
+
result: zod_1.z.any().describe("The final extracted data, confirmation message, or state summary to return."),
|
|
795
|
+
success: zod_1.z.boolean(),
|
|
796
|
+
taskId: zod_1.z.string().optional(),
|
|
797
|
+
}),
|
|
798
|
+
});
|
|
799
|
+
exports.aetherTools = [
|
|
800
|
+
actTool,
|
|
801
|
+
getStateTool,
|
|
802
|
+
executeScriptTool,
|
|
803
|
+
cdpCommandTool,
|
|
804
|
+
computerTool,
|
|
805
|
+
writeScratchpadTool,
|
|
806
|
+
completeTaskTool
|
|
807
|
+
];
|
|
808
|
+
// ============================================================================
|
|
809
|
+
// 4. LLM & GRAPH ROUTING
|
|
810
|
+
// ============================================================================
|
|
811
|
+
const llm = new google_genai_1.ChatGoogleGenerativeAI({
|
|
812
|
+
model: process.env.MODEL_NAME || "gemini-3-flash-preview",
|
|
813
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
814
|
+
temperature: 0,
|
|
815
|
+
}).bindTools(exports.aetherTools);
|
|
816
|
+
async function callModel(state) {
|
|
817
|
+
const taskState = exports.activeTasks.get(state.taskId);
|
|
818
|
+
// --- Interrupt check ---
|
|
819
|
+
if (taskState?.shouldInterrupt) {
|
|
820
|
+
void updateScratchpad(state.taskId, "⚠️ Task interrupted by Orchestrator.");
|
|
821
|
+
emitTaskUpdate(state.taskId, "interrupted");
|
|
822
|
+
return { status: "interrupted" };
|
|
823
|
+
}
|
|
824
|
+
// --- Error budget: count consecutive tool errors from previous round ---
|
|
825
|
+
const prevToolMessages = state.messages.filter((m) => m instanceof messages_1.ToolMessage || m._getType?.() === "tool");
|
|
826
|
+
const recentErrors = prevToolMessages.slice(-MAX_CONSECUTIVE_ERRORS).filter(m => /^(Error|Script Error|CDP Error|Computer Tool Error):/.test(String(m.content))).length;
|
|
827
|
+
const newErrorCount = (recentErrors > 0) ? 1 : 0; // incremental delta for accumulator
|
|
828
|
+
if (state.errorCount >= MAX_CONSECUTIVE_ERRORS || recentErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
829
|
+
const msg = `## MAX ERRORS REACHED (${state.errorCount + newErrorCount}/${MAX_CONSECUTIVE_ERRORS})\nForcing task to failed state to prevent infinite retry loop.`;
|
|
830
|
+
void updateScratchpad(state.taskId, msg);
|
|
831
|
+
emitTaskUpdate(state.taskId, "failed", { reason: "max_errors" });
|
|
832
|
+
return { status: "failed", errorCount: newErrorCount };
|
|
833
|
+
}
|
|
834
|
+
// --- Build messages ---
|
|
835
|
+
const isFirstRun = state.messages.length === 0;
|
|
836
|
+
const initialMessages = [];
|
|
837
|
+
if (isFirstRun) {
|
|
838
|
+
initialMessages.push(new messages_1.SystemMessage(AETHER_SYSTEM_PROMPT));
|
|
839
|
+
initialMessages.push(new messages_1.HumanMessage(`# OBJECTIVE
|
|
840
|
+
${state.objective}
|
|
841
|
+
|
|
842
|
+
# CONTEXT
|
|
843
|
+
- Task ID: ${state.taskId}
|
|
844
|
+
- Parent Task: ${state.parentId || "None"}
|
|
845
|
+
|
|
846
|
+
# EXECUTION GUIDELINES
|
|
847
|
+
1. Start by calling get_state to understand the current browser state
|
|
848
|
+
2. Break the objective into small, verifiable steps
|
|
849
|
+
3. Verify each step succeeded before proceeding
|
|
850
|
+
4. Save important findings with write_to_scratchpad
|
|
851
|
+
5. When fully complete, call complete_task with the result
|
|
852
|
+
|
|
853
|
+
Always pass taskId: ${state.taskId} to every tool call.`));
|
|
854
|
+
void updateScratchpad(state.taskId, `**Objective:** ${state.objective}`, false);
|
|
855
|
+
}
|
|
856
|
+
const messagesToInvoke = [...initialMessages, ...state.messages];
|
|
857
|
+
emitTaskUpdate(state.taskId, "Thinking...");
|
|
858
|
+
const response = await invokeLLMWithRetry(messagesToInvoke, state.taskId);
|
|
859
|
+
let status = state.status;
|
|
860
|
+
let finalResult = state.finalResult;
|
|
861
|
+
if (response.tool_calls?.some(tc => tc.name === "complete_task")) {
|
|
862
|
+
const completeCall = response.tool_calls.find(tc => tc.name === "complete_task");
|
|
863
|
+
status = completeCall?.args.success ? "success" : "failed";
|
|
864
|
+
finalResult = completeCall?.args.result;
|
|
865
|
+
emitTaskUpdate(state.taskId, `Task ${status}`, { result: finalResult });
|
|
866
|
+
if (taskState)
|
|
867
|
+
taskState.result = finalResult;
|
|
868
|
+
}
|
|
869
|
+
return { messages: [...initialMessages, response], status, finalResult, errorCount: newErrorCount };
|
|
870
|
+
}
|
|
871
|
+
function shouldContinue(state) {
|
|
872
|
+
if (state.status === "success" || state.status === "failed" || state.status === "interrupted")
|
|
873
|
+
return langgraph_1.END;
|
|
874
|
+
const lastMessage = state.messages[state.messages.length - 1];
|
|
875
|
+
if (lastMessage.tool_calls && lastMessage.tool_calls.length > 0)
|
|
876
|
+
return "execute_tools";
|
|
877
|
+
return langgraph_1.END;
|
|
878
|
+
}
|
|
879
|
+
// ============================================================================
|
|
880
|
+
// 5. COMPILE GRAPH
|
|
881
|
+
// ============================================================================
|
|
882
|
+
const toolNode = new prebuilt_1.ToolNode(exports.aetherTools);
|
|
883
|
+
const workflow = new langgraph_1.StateGraph({ channels: graphState })
|
|
884
|
+
.addNode("agent", callModel)
|
|
885
|
+
.addNode("execute_tools", toolNode)
|
|
886
|
+
.addEdge(langgraph_1.START, "agent")
|
|
887
|
+
.addConditionalEdges("agent", shouldContinue)
|
|
888
|
+
.addEdge("execute_tools", "agent");
|
|
889
|
+
const checkpointer = new langgraph_1.MemorySaver();
|
|
890
|
+
exports.aetherNavigationAgent = workflow.compile({ checkpointer });
|
|
891
|
+
// ============================================================================
|
|
892
|
+
// 6. ORCHESTRATOR DELEGATION
|
|
893
|
+
// ============================================================================
|
|
894
|
+
/**
|
|
895
|
+
* Idempotency guard: tracks taskIds that are currently being processed.
|
|
896
|
+
* Prevents duplicate dispatches of the same taskId.
|
|
897
|
+
*/
|
|
898
|
+
const dispatchingTasks = new Set();
|
|
899
|
+
async function delegateToBrowser(objective, taskId, parentId) {
|
|
900
|
+
// --- Idempotency check ---
|
|
901
|
+
if (dispatchingTasks.has(taskId)) {
|
|
902
|
+
const existing = exports.activeTasks.get(taskId);
|
|
903
|
+
if (existing && !["success", "failed", "crashed", "interrupted", "timeout"].includes(existing.status) && !existing.status.startsWith("Crashed")) {
|
|
904
|
+
console.error(`[Agent] Idempotency guard: task ${taskId} is already running`);
|
|
905
|
+
throw new Error(`Task "${taskId}" is already in progress. Use aether_check_status to monitor it, or aether_interrupt_task to stop it before re-dispatching.`);
|
|
906
|
+
}
|
|
907
|
+
// Task was terminal — clean up and allow re-dispatch
|
|
908
|
+
dispatchingTasks.delete(taskId);
|
|
909
|
+
}
|
|
910
|
+
// --- Circuit breaker check ---
|
|
911
|
+
if (isCircuitBreakerOpen()) {
|
|
912
|
+
console.error(`[Agent] Circuit breaker is open — rejecting task ${taskId}`);
|
|
913
|
+
throw new Error(`Browser extension circuit breaker is OPEN (${getCircuitBreakerStatus()}). The extension has failed too many times. Wait for cooldown or check extension health at GET /health.`);
|
|
914
|
+
}
|
|
915
|
+
// --- Active tasks memory check ---
|
|
916
|
+
if (exports.activeTasks.size >= MAX_ACTIVE_TASKS) {
|
|
917
|
+
// Force purge to make room
|
|
918
|
+
purgeCompletedTasks();
|
|
919
|
+
if (exports.activeTasks.size >= MAX_ACTIVE_TASKS) {
|
|
920
|
+
throw new Error(`Too many active tasks (${exports.activeTasks.size}). Maximum is ${MAX_ACTIVE_TASKS}. Wait for some to complete or interrupt them.`);
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
const config = { configurable: { thread_id: taskId } };
|
|
924
|
+
const initialState = { objective, taskId, parentId, status: "pending", errorCount: 0, stepCount: 0 };
|
|
925
|
+
dispatchingTasks.add(taskId);
|
|
926
|
+
exports.activeTasks.set(taskId, {
|
|
927
|
+
objective,
|
|
928
|
+
status: "Starting Engine...",
|
|
929
|
+
lastObservation: "N/A",
|
|
930
|
+
shouldInterrupt: false,
|
|
931
|
+
startedAt: Date.now(),
|
|
932
|
+
stepCount: 0,
|
|
933
|
+
consecutiveErrors: 0,
|
|
934
|
+
toolCallHistory: [],
|
|
935
|
+
});
|
|
936
|
+
(0, ws_server_1.setActiveTaskCount)(exports.activeTasks.size);
|
|
937
|
+
// Persist as "running" so a server restart can detect crashed tasks
|
|
938
|
+
await saveTaskState(taskId, objective, "running");
|
|
939
|
+
emitTaskUpdate(taskId, "Starting Engine...");
|
|
940
|
+
// Task timeout wrapper
|
|
941
|
+
const taskTimeout = setTimeout(() => {
|
|
942
|
+
const task = exports.activeTasks.get(taskId);
|
|
943
|
+
if (task && !["success", "failed", "interrupted", "crashed", "timeout"].includes(task.status) && !task.status.startsWith("Crashed")) {
|
|
944
|
+
void updateScratchpad(taskId, `⏰ TASK TIMEOUT: Exceeded ${TASK_TIMEOUT_MS / 1000}s limit`);
|
|
945
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "timeout", data: { timeoutMs: TASK_TIMEOUT_MS } });
|
|
946
|
+
emitTaskUpdate(taskId, "timeout", { reason: "task_timeout" });
|
|
947
|
+
task.shouldInterrupt = true;
|
|
948
|
+
}
|
|
949
|
+
}, TASK_TIMEOUT_MS);
|
|
950
|
+
try {
|
|
951
|
+
const stream = await exports.aetherNavigationAgent.stream(initialState, config);
|
|
952
|
+
try {
|
|
953
|
+
for await (const chunk of stream) {
|
|
954
|
+
const currentTask = exports.activeTasks.get(taskId);
|
|
955
|
+
// Interrupt check
|
|
956
|
+
if (currentTask?.shouldInterrupt)
|
|
957
|
+
break;
|
|
958
|
+
// Process stream chunks to extract state updates
|
|
959
|
+
// LangGraph stream chunks contain node outputs - extract status changes
|
|
960
|
+
if (chunk && typeof chunk === "object") {
|
|
961
|
+
for (const [nodeName, nodeOutput] of Object.entries(chunk)) {
|
|
962
|
+
if (nodeOutput && typeof nodeOutput === "object" && "status" in nodeOutput) {
|
|
963
|
+
const newStatus = nodeOutput.status;
|
|
964
|
+
if (newStatus && newStatus !== currentTask?.status) {
|
|
965
|
+
emitTaskUpdate(taskId, newStatus, { node: nodeName });
|
|
966
|
+
}
|
|
967
|
+
// Extract error count if present
|
|
968
|
+
if ("errorCount" in nodeOutput) {
|
|
969
|
+
const task = exports.activeTasks.get(taskId);
|
|
970
|
+
if (task)
|
|
971
|
+
task.consecutiveErrors = nodeOutput.errorCount;
|
|
972
|
+
}
|
|
973
|
+
// Extract step count if present
|
|
974
|
+
if ("stepCount" in nodeOutput) {
|
|
975
|
+
const task = exports.activeTasks.get(taskId);
|
|
976
|
+
if (task)
|
|
977
|
+
task.stepCount = nodeOutput.stepCount;
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
// Check for consecutive errors from task tracker
|
|
983
|
+
if (currentTask && currentTask.consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
|
|
984
|
+
void updateScratchpad(taskId, `## MAX ERRORS REACHED (${currentTask.consecutiveErrors}/${MAX_CONSECUTIVE_ERRORS})\nForcing task to failed state.`);
|
|
985
|
+
emitTaskUpdate(taskId, "failed", { reason: "max_consecutive_errors" });
|
|
986
|
+
break;
|
|
987
|
+
}
|
|
988
|
+
// Check task duration
|
|
989
|
+
if (currentTask) {
|
|
990
|
+
const elapsed = Date.now() - currentTask.startedAt;
|
|
991
|
+
if (elapsed > TASK_TIMEOUT_MS) {
|
|
992
|
+
void updateScratchpad(taskId, `⏰ Task exceeded ${TASK_TIMEOUT_MS / 1000}s limit`);
|
|
993
|
+
emitTaskUpdate(taskId, "timeout", { reason: "task_timeout", elapsedMs: elapsed });
|
|
994
|
+
break;
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
finally {
|
|
1000
|
+
if (typeof stream.return === "function") {
|
|
1001
|
+
await stream.return(undefined);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
catch (err) {
|
|
1006
|
+
void updateScratchpad(taskId, `## CRITICAL FAILURE\n${err.message}`);
|
|
1007
|
+
emitAgentEvent({ taskId, timestamp: new Date().toISOString(), eventType: "critical_error", data: { error: err.message, stack: err.stack } });
|
|
1008
|
+
await saveTaskState(taskId, objective, "crashed", { errorCount: exports.activeTasks.get(taskId)?.consecutiveErrors ?? 0, stepCount: exports.activeTasks.get(taskId)?.stepCount ?? 0 });
|
|
1009
|
+
emitTaskUpdate(taskId, "crashed", { error: err.message });
|
|
1010
|
+
throw err;
|
|
1011
|
+
}
|
|
1012
|
+
finally {
|
|
1013
|
+
clearTimeout(taskTimeout);
|
|
1014
|
+
dispatchingTasks.delete(taskId);
|
|
1015
|
+
(0, ws_server_1.setActiveTaskCount)(exports.activeTasks.size);
|
|
1016
|
+
}
|
|
1017
|
+
const finalState = await exports.aetherNavigationAgent.getState(config);
|
|
1018
|
+
const finalValues = finalState.values;
|
|
1019
|
+
const finalStatus = finalValues.status ?? "unknown";
|
|
1020
|
+
// Update task tracker with final values
|
|
1021
|
+
const task = exports.activeTasks.get(taskId);
|
|
1022
|
+
if (task) {
|
|
1023
|
+
task.status = finalStatus;
|
|
1024
|
+
task.result = finalValues.finalResult;
|
|
1025
|
+
task.stepCount = finalValues.stepCount ?? task.stepCount;
|
|
1026
|
+
task.consecutiveErrors = finalValues.errorCount ?? task.consecutiveErrors;
|
|
1027
|
+
}
|
|
1028
|
+
await saveTaskState(taskId, objective, finalStatus, {
|
|
1029
|
+
errorCount: task?.consecutiveErrors ?? 0,
|
|
1030
|
+
stepCount: task?.stepCount ?? 0
|
|
1031
|
+
});
|
|
1032
|
+
// Emit final completion event
|
|
1033
|
+
emitAgentEvent({
|
|
1034
|
+
taskId,
|
|
1035
|
+
timestamp: new Date().toISOString(),
|
|
1036
|
+
eventType: finalStatus === "success" ? "completion" : "state_change",
|
|
1037
|
+
data: { status: finalStatus, result: finalValues.finalResult, stepCount: task?.stepCount, errorCount: task?.consecutiveErrors },
|
|
1038
|
+
});
|
|
1039
|
+
emitTaskUpdate(taskId, finalStatus, {
|
|
1040
|
+
result: finalValues.finalResult,
|
|
1041
|
+
stepCount: task?.stepCount,
|
|
1042
|
+
errorCount: task?.consecutiveErrors,
|
|
1043
|
+
});
|
|
1044
|
+
(0, ws_server_1.setActiveTaskCount)(exports.activeTasks.size);
|
|
1045
|
+
return finalState;
|
|
1046
|
+
}
|