ai-browser 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +40 -1
  2. package/dist/agent/agent-loop.d.ts +26 -2
  3. package/dist/agent/agent-loop.d.ts.map +1 -1
  4. package/dist/agent/agent-loop.js +371 -89
  5. package/dist/agent/agent-loop.js.map +1 -1
  6. package/dist/agent/config.d.ts +5 -0
  7. package/dist/agent/config.d.ts.map +1 -1
  8. package/dist/agent/config.js +5 -0
  9. package/dist/agent/config.js.map +1 -1
  10. package/dist/agent/content-budget.d.ts +11 -0
  11. package/dist/agent/content-budget.d.ts.map +1 -0
  12. package/dist/agent/content-budget.js +133 -0
  13. package/dist/agent/content-budget.js.map +1 -0
  14. package/dist/agent/conversation-manager.d.ts +48 -0
  15. package/dist/agent/conversation-manager.d.ts.map +1 -0
  16. package/dist/agent/conversation-manager.js +171 -0
  17. package/dist/agent/conversation-manager.js.map +1 -0
  18. package/dist/agent/error-recovery.d.ts +29 -0
  19. package/dist/agent/error-recovery.d.ts.map +1 -0
  20. package/dist/agent/error-recovery.js +72 -0
  21. package/dist/agent/error-recovery.js.map +1 -0
  22. package/dist/agent/index.js +7 -2
  23. package/dist/agent/index.js.map +1 -1
  24. package/dist/agent/page-state-cache.d.ts +22 -0
  25. package/dist/agent/page-state-cache.d.ts.map +1 -0
  26. package/dist/agent/page-state-cache.js +71 -0
  27. package/dist/agent/page-state-cache.js.map +1 -0
  28. package/dist/agent/progress-estimator.d.ts +17 -0
  29. package/dist/agent/progress-estimator.d.ts.map +1 -0
  30. package/dist/agent/progress-estimator.js +67 -0
  31. package/dist/agent/progress-estimator.js.map +1 -0
  32. package/dist/agent/prompt.d.ts +1 -1
  33. package/dist/agent/prompt.d.ts.map +1 -1
  34. package/dist/agent/prompt.js +91 -48
  35. package/dist/agent/prompt.js.map +1 -1
  36. package/dist/agent/token-tracker.d.ts +22 -0
  37. package/dist/agent/token-tracker.d.ts.map +1 -0
  38. package/dist/agent/token-tracker.js +29 -0
  39. package/dist/agent/token-tracker.js.map +1 -0
  40. package/dist/agent/tool-usage-tracker.d.ts +45 -0
  41. package/dist/agent/tool-usage-tracker.d.ts.map +1 -0
  42. package/dist/agent/tool-usage-tracker.js +149 -0
  43. package/dist/agent/tool-usage-tracker.js.map +1 -0
  44. package/dist/agent/types.d.ts +30 -0
  45. package/dist/agent/types.d.ts.map +1 -1
  46. package/dist/api/mcp-sse.d.ts +2 -1
  47. package/dist/api/mcp-sse.d.ts.map +1 -1
  48. package/dist/api/mcp-sse.js +2 -1
  49. package/dist/api/mcp-sse.js.map +1 -1
  50. package/dist/api/routes.d.ts +2 -1
  51. package/dist/api/routes.d.ts.map +1 -1
  52. package/dist/api/routes.js +355 -7
  53. package/dist/api/routes.js.map +1 -1
  54. package/dist/browser/BrowserManager.d.ts.map +1 -1
  55. package/dist/browser/BrowserManager.js +5 -2
  56. package/dist/browser/BrowserManager.js.map +1 -1
  57. package/dist/cli/mcp-stdio.js +3 -0
  58. package/dist/cli/mcp-stdio.js.map +1 -1
  59. package/dist/cli/server.js +15 -3
  60. package/dist/cli/server.js.map +1 -1
  61. package/dist/mcp/ai-markdown.d.ts +2 -0
  62. package/dist/mcp/ai-markdown.d.ts.map +1 -0
  63. package/dist/mcp/ai-markdown.js +1807 -0
  64. package/dist/mcp/ai-markdown.js.map +1 -0
  65. package/dist/mcp/browser-mcp-server.d.ts +2 -0
  66. package/dist/mcp/browser-mcp-server.d.ts.map +1 -1
  67. package/dist/mcp/browser-mcp-server.js +340 -49
  68. package/dist/mcp/browser-mcp-server.js.map +1 -1
  69. package/dist/mcp/task-tools.d.ts.map +1 -1
  70. package/dist/mcp/task-tools.js +108 -13
  71. package/dist/mcp/task-tools.js.map +1 -1
  72. package/dist/memory/KnowledgeCardStore.d.ts +35 -0
  73. package/dist/memory/KnowledgeCardStore.d.ts.map +1 -0
  74. package/dist/memory/KnowledgeCardStore.js +304 -0
  75. package/dist/memory/KnowledgeCardStore.js.map +1 -0
  76. package/dist/memory/MemoryCapturer.d.ts +14 -0
  77. package/dist/memory/MemoryCapturer.d.ts.map +1 -0
  78. package/dist/memory/MemoryCapturer.js +183 -0
  79. package/dist/memory/MemoryCapturer.js.map +1 -0
  80. package/dist/memory/MemoryInjector.d.ts +23 -0
  81. package/dist/memory/MemoryInjector.d.ts.map +1 -0
  82. package/dist/memory/MemoryInjector.js +180 -0
  83. package/dist/memory/MemoryInjector.js.map +1 -0
  84. package/dist/memory/RecordingConverter.d.ts +16 -0
  85. package/dist/memory/RecordingConverter.d.ts.map +1 -0
  86. package/dist/memory/RecordingConverter.js +108 -0
  87. package/dist/memory/RecordingConverter.js.map +1 -0
  88. package/dist/memory/SessionRecorder.d.ts +39 -0
  89. package/dist/memory/SessionRecorder.d.ts.map +1 -0
  90. package/dist/memory/SessionRecorder.js +198 -0
  91. package/dist/memory/SessionRecorder.js.map +1 -0
  92. package/dist/memory/index.d.ts +8 -0
  93. package/dist/memory/index.d.ts.map +1 -0
  94. package/dist/memory/index.js +6 -0
  95. package/dist/memory/index.js.map +1 -0
  96. package/dist/memory/types.d.ts +39 -0
  97. package/dist/memory/types.d.ts.map +1 -0
  98. package/dist/memory/types.js +2 -0
  99. package/dist/memory/types.js.map +1 -0
  100. package/dist/task/tool-actions.d.ts +4 -0
  101. package/dist/task/tool-actions.d.ts.map +1 -1
  102. package/dist/task/tool-actions.js +72 -0
  103. package/dist/task/tool-actions.js.map +1 -1
  104. package/package.json +5 -2
  105. package/public/index.html +2296 -226
  106. package/public/task-result.html +107 -12
  107. package/public/tasks.html +83 -10
@@ -3,84 +3,60 @@ import { randomUUID } from 'node:crypto';
3
3
  import OpenAI from 'openai';
4
4
  import { config } from './config.js';
5
5
  import { SYSTEM_PROMPT } from './prompt.js';
6
- const MAX_CONTENT_LENGTH = 4000;
7
- function truncate(text, max = MAX_CONTENT_LENGTH) {
8
- if (text.length <= max)
9
- return text;
10
- return text.slice(0, max) + `\n...(已截断,共${text.length}字符)`;
11
- }
12
- function formatForLLM(rawText, toolName) {
13
- try {
14
- const data = JSON.parse(rawText);
15
- if (toolName === 'get_page_info' && data?.elements) {
16
- const summary = {
17
- page: data.page,
18
- elementCount: data.elements.length,
19
- elements: data.elements.slice(0, 30).map((e) => ({
20
- id: e.id,
21
- type: e.type,
22
- label: e.label,
23
- })),
24
- intents: data.intents,
25
- };
26
- if (data.stability)
27
- summary.stability = data.stability;
28
- if (data.pendingDialog)
29
- summary.pendingDialog = data.pendingDialog;
30
- if (data.elements.length > 30) {
31
- summary.note = `显示前30个元素,共${data.elements.length}个`;
32
- }
33
- return truncate(JSON.stringify(summary, null, 2));
34
- }
35
- if (toolName === 'get_page_content') {
36
- let md = `# ${data.title || ''}\n\n`;
37
- const sections = Array.isArray(data.sections) ? data.sections : [];
38
- for (const s of sections) {
39
- const stars = s.attention >= 0.7 ? '★★★'
40
- : s.attention >= 0.4 ? '★★'
41
- : '★';
42
- md += `[${stars}] ${s.text}\n\n`;
43
- }
44
- if (sections.length === 0)
45
- md += '(未提取到内容)\n';
46
- return truncate(md);
47
- }
48
- return truncate(JSON.stringify(data));
49
- }
50
- catch {
51
- return truncate(rawText);
52
- }
53
- }
6
+ import { formatToolResult } from './content-budget.js';
7
+ import { ToolUsageTracker } from './tool-usage-tracker.js';
8
+ import { determineRecovery, extractErrorCode } from './error-recovery.js';
9
+ import { ConversationManager } from './conversation-manager.js';
10
+ import { TokenTracker } from './token-tracker.js';
11
+ import { PageStateCache } from './page-state-cache.js';
12
+ import { ProgressEstimator } from './progress-estimator.js';
13
+ import { MemoryCapturer, mergePatterns } from '../memory/MemoryCapturer.js';
14
+ import { MemoryInjector } from '../memory/MemoryInjector.js';
54
15
  export class BrowsingAgent extends EventEmitter {
55
16
  openai;
56
17
  mcpClient;
57
18
  state;
58
- messages;
19
+ conversation = new ConversationManager();
59
20
  model;
60
21
  maxIterations;
61
22
  initialMessages;
62
23
  tools = [];
63
- recentToolCalls = []; // 循环检测:记录最近工具调用签名
24
+ toolTracker = new ToolUsageTracker();
25
+ tokenTracker = new TokenTracker();
26
+ pageStateCache = new PageStateCache();
27
+ progressEstimator;
28
+ subGoals = [];
29
+ knowledgeStore;
64
30
  stepWarningInjected = false;
65
31
  pendingInputResolve = null;
66
32
  pendingInputRequestId = null;
33
+ _askHumanTimer = null;
34
+ recalledDomains = new Set();
35
+ taskText = '';
67
36
  constructor(options) {
68
37
  super();
69
38
  this.model = options.model || config.llm.model;
39
+ const timeoutMs = options.timeout ? options.timeout * 1000 : 120_000;
70
40
  this.openai = new OpenAI({
71
41
  baseURL: options.baseURL || config.llm.baseURL,
72
42
  apiKey: options.apiKey || config.llm.apiKey,
43
+ timeout: timeoutMs,
44
+ maxRetries: 0, // disable SDK-level retries; agent loop handles retries
73
45
  });
74
46
  this.mcpClient = options.mcpClient;
75
47
  this.maxIterations = options.maxIterations ?? config.maxIterations;
76
48
  this.initialMessages = options.initialMessages || [];
49
+ this.progressEstimator = new ProgressEstimator(this.maxIterations);
50
+ this.knowledgeStore = options.knowledgeStore;
51
+ if (options.subGoals?.length) {
52
+ this.subGoals = options.subGoals.map(d => ({ description: d, completed: false }));
53
+ }
77
54
  this.state = {
78
55
  sessionId: '',
79
56
  iteration: 0,
80
57
  consecutiveErrors: 0,
81
58
  done: false,
82
59
  };
83
- this.messages = [];
84
60
  }
85
61
  get sessionId() {
86
62
  return this.state.sessionId;
@@ -160,7 +136,21 @@ export class BrowsingAgent extends EventEmitter {
160
136
  });
161
137
  console.log(`[Agent] 发现 ${this.tools.length} 个工具`);
162
138
  }
139
+ _running = false;
163
140
  async run(task) {
141
+ if (this._running) {
142
+ return { success: false, error: 'Agent is already running', iterations: 0 };
143
+ }
144
+ this._running = true;
145
+ try {
146
+ return await this._run(task);
147
+ }
148
+ finally {
149
+ this._running = false;
150
+ }
151
+ }
152
+ async _run(task) {
153
+ this.taskText = task;
164
154
  // Discover tools from MCP server
165
155
  await this.discoverTools();
166
156
  // Create session via MCP
@@ -168,7 +158,9 @@ export class BrowsingAgent extends EventEmitter {
168
158
  let sessionResult;
169
159
  try {
170
160
  sessionResult = await this.mcpClient.callTool({ name: 'create_session', arguments: {} });
171
- const text = sessionResult.content[0]?.text;
161
+ const text = sessionResult.content?.[0]?.text;
162
+ if (!text)
163
+ throw new Error('create_session returned no text content');
172
164
  const parsed = JSON.parse(text);
173
165
  this.state.sessionId = parsed.sessionId;
174
166
  }
@@ -180,11 +172,37 @@ export class BrowsingAgent extends EventEmitter {
180
172
  console.log(`[Agent] 会话已创建: ${this.state.sessionId}`);
181
173
  this.emitEvent({ type: 'session_created', sessionId: this.state.sessionId });
182
174
  // Build messages: system + initialMessages (conversation memory) + user task
183
- this.messages = [
184
- { role: 'system', content: SYSTEM_PROMPT },
185
- ...this.initialMessages,
186
- { role: 'user', content: task },
187
- ];
175
+ let systemPrompt = SYSTEM_PROMPT;
176
+ if (this.subGoals.length > 0) {
177
+ const goalList = this.subGoals.map((g, i) => `${i + 1}. ${g.description}`).join('\n');
178
+ systemPrompt += `\n\n## 子目标\n\n按顺序完成以下子目标:\n${goalList}\n\n完成每个子目标后,在思考中标注"[子目标完成: N]"(N为序号)。`;
179
+ }
180
+ this.conversation.init(systemPrompt, this.initialMessages, task);
181
+ // Pre-recall: ask LLM to select relevant site memories from index
182
+ if (this.knowledgeStore) {
183
+ try {
184
+ const selected = await this.selectMemories(task);
185
+ for (const { domain, card } of selected) {
186
+ this.recalledDomains.add(domain);
187
+ const normalized = MemoryCapturer.extractDomain(`https://${domain}`);
188
+ if (normalized)
189
+ this.recalledDomains.add(normalized);
190
+ const context = MemoryInjector.buildContext(card, 2000, task);
191
+ this.conversation.push({ role: 'user', content: `[系统提示] 以下是该站点的历史操作记忆,请优先按照记忆中的步骤和选择器操作,避免重复探索。如果记忆中提供了 CSS 选择器,请直接使用 execute_javascript + querySelector 操作元素。\n\n${context}` });
192
+ console.log(`[Agent] 预召回站点记忆: ${domain} (${card.patterns.length} 条模式)`);
193
+ this.emitEvent({
194
+ type: 'memory_recall',
195
+ domain,
196
+ patternCount: card.patterns.length,
197
+ context,
198
+ iteration: 0,
199
+ });
200
+ }
201
+ }
202
+ catch (err) {
203
+ console.log(`[Agent] 记忆选择失败,跳过: ${err.message}`);
204
+ }
205
+ }
188
206
  let finalResult;
189
207
  try {
190
208
  finalResult = await this.loop();
@@ -192,12 +210,42 @@ export class BrowsingAgent extends EventEmitter {
192
210
  catch (err) {
193
211
  finalResult = { success: false, error: err.message, iterations: this.state.iteration };
194
212
  }
213
+ // Capture patterns from successful runs
214
+ if (finalResult.success && this.knowledgeStore) {
215
+ try {
216
+ const history = this.toolTracker.getHistory();
217
+ // Find the last navigated URL from tool history
218
+ let lastUrl = '';
219
+ for (let i = history.length - 1; i >= 0; i--) {
220
+ if (history[i].toolName === 'navigate' && history[i].success && history[i].args.url) {
221
+ lastUrl = history[i].args.url;
222
+ break;
223
+ }
224
+ }
225
+ if (lastUrl) {
226
+ const domain = MemoryCapturer.extractDomain(lastUrl);
227
+ const patterns = MemoryCapturer.extractPatterns(history, lastUrl);
228
+ if (domain && patterns.length > 0) {
229
+ const existing = this.knowledgeStore.loadCard(domain);
230
+ const card = existing
231
+ ? { ...existing, patterns: mergePatterns(existing.patterns, patterns), version: existing.version + 1, updatedAt: Date.now() }
232
+ : { domain, version: 1, patterns, createdAt: Date.now(), updatedAt: Date.now() };
233
+ this.knowledgeStore.saveCard(card);
234
+ console.log(`[Agent] 保存站点记忆: ${domain} (${patterns.length} 条新模式)`);
235
+ }
236
+ }
237
+ }
238
+ catch { /* non-critical */ }
239
+ }
240
+ // Attach token usage
241
+ finalResult.tokenUsage = this.tokenTracker.getUsage();
195
242
  this.emitEvent({
196
243
  type: 'done',
197
244
  success: finalResult.success,
198
245
  result: finalResult.result,
199
246
  error: finalResult.error,
200
247
  iterations: finalResult.iterations,
248
+ tokenUsage: finalResult.tokenUsage,
201
249
  });
202
250
  await this.cleanup();
203
251
  return finalResult;
@@ -210,9 +258,9 @@ export class BrowsingAgent extends EventEmitter {
210
258
  const remainingSteps = this.maxIterations - this.state.iteration;
211
259
  if (!this.stepWarningInjected && remainingSteps <= 2 && remainingSteps > 0 && this.maxIterations > 3) {
212
260
  this.stepWarningInjected = true;
213
- this.messages.push({
214
- role: 'system',
215
- content: `⚠️ 你还剩 ${remainingSteps} 步就达到上限,请立即用 done 工具报告已获取的所有信息,不要再做额外操作。`,
261
+ this.conversation.push({
262
+ role: 'user',
263
+ content: `[系统提示] ⚠️ 你还剩 ${remainingSteps} 步就达到上限,请立即用 done 工具报告已获取的所有信息,不要再做额外操作。`,
216
264
  });
217
265
  console.log(`[Agent] 注入步数提醒,剩余 ${remainingSteps} 步`);
218
266
  }
@@ -220,19 +268,26 @@ export class BrowsingAgent extends EventEmitter {
220
268
  try {
221
269
  response = await this.openai.chat.completions.create({
222
270
  model: this.model,
223
- messages: this.messages,
271
+ messages: this.conversation.getMessages(),
224
272
  tools: this.tools,
225
273
  tool_choice: 'auto',
226
274
  });
275
+ this.tokenTracker.recordLLMCall(response.usage);
227
276
  }
228
277
  catch (err) {
229
278
  this.state.consecutiveErrors++;
230
279
  console.log(`[Agent] LLM API 错误 (${this.state.consecutiveErrors}/${config.maxConsecutiveErrors}): ${err.message}`);
231
280
  this.emitEvent({ type: 'error', message: err.message, iteration: this.state.iteration });
232
- if (this.state.consecutiveErrors >= config.maxConsecutiveErrors) {
281
+ const recovery = determineRecovery({
282
+ errorMessage: err.message,
283
+ toolName: '_llm_api',
284
+ consecutiveErrors: this.state.consecutiveErrors,
285
+ });
286
+ if (recovery.type === 'abort' || this.state.consecutiveErrors >= config.maxConsecutiveErrors) {
233
287
  return { success: false, error: `LLM API 连续失败: ${err.message}`, iterations: this.state.iteration };
234
288
  }
235
- await new Promise(r => setTimeout(r, 2000));
289
+ const delay = recovery.type === 'retry' ? recovery.delayMs : 2000;
290
+ await new Promise(r => setTimeout(r, delay));
236
291
  continue;
237
292
  }
238
293
  const message = response.choices[0]?.message;
@@ -242,8 +297,17 @@ export class BrowsingAgent extends EventEmitter {
242
297
  if (message.content) {
243
298
  console.log(`[Agent] 思考: ${message.content}`);
244
299
  this.emitEvent({ type: 'thinking', content: message.content, iteration: this.state.iteration });
300
+ // Detect subgoal completion markers in thinking
301
+ const goalMatch = message.content.match(/\[子目标完成:\s*(\d+)\]/);
302
+ if (goalMatch) {
303
+ const idx = parseInt(goalMatch[1], 10) - 1;
304
+ if (idx >= 0 && idx < this.subGoals.length && !this.subGoals[idx].completed) {
305
+ this.subGoals[idx].completed = true;
306
+ this.emitEvent({ type: 'subgoal_completed', subGoal: this.subGoals[idx].description, iteration: this.state.iteration });
307
+ }
308
+ }
245
309
  }
246
- this.messages.push(message);
310
+ this.conversation.push(message);
247
311
  if (!message.tool_calls || message.tool_calls.length === 0) {
248
312
  console.log('[Agent] LLM 未调用工具,任务结束');
249
313
  return {
@@ -263,6 +327,7 @@ export class BrowsingAgent extends EventEmitter {
263
327
  return { success: true, iterations: this.state.iteration };
264
328
  }
265
329
  async executeToolCalls(toolCalls) {
330
+ const deferredHints = [];
266
331
  for (const toolCall of toolCalls) {
267
332
  const name = toolCall.function.name;
268
333
  let args;
@@ -271,7 +336,7 @@ export class BrowsingAgent extends EventEmitter {
271
336
  }
272
337
  catch {
273
338
  console.log(`[Agent] 工具参数解析失败: ${toolCall.function.arguments}`);
274
- this.messages.push({
339
+ this.conversation.push({
275
340
  role: 'tool',
276
341
  tool_call_id: toolCall.id,
277
342
  content: JSON.stringify({ error: '工具参数 JSON 解析失败' }),
@@ -281,22 +346,6 @@ export class BrowsingAgent extends EventEmitter {
281
346
  }
282
347
  console.log(`[Agent] 调用工具: ${name}(${JSON.stringify(args)})`);
283
348
  this.emitEvent({ type: 'tool_call', name, args, iteration: this.state.iteration });
284
- // 循环检测:记录工具调用签名
285
- const callSig = `${name}:${JSON.stringify(args)}`;
286
- this.recentToolCalls.push(callSig);
287
- if (this.recentToolCalls.length > 3) {
288
- this.recentToolCalls.shift();
289
- }
290
- if (this.recentToolCalls.length === 3 &&
291
- this.recentToolCalls[0] === this.recentToolCalls[1] &&
292
- this.recentToolCalls[1] === this.recentToolCalls[2]) {
293
- console.log('[Agent] 检测到循环调用,注入提醒');
294
- this.messages.push({
295
- role: 'system',
296
- content: '⚠️ 你已连续3次调用相同工具且参数相同,这不会产生新结果。请换一种方式操作,或用 done 工具报告当前已获取的信息。',
297
- });
298
- this.recentToolCalls = [];
299
- }
300
349
  if (name === 'done') {
301
350
  const result = args.result || '任务完成';
302
351
  console.log(`[Agent] 任务完成: ${result}`);
@@ -315,15 +364,23 @@ export class BrowsingAgent extends EventEmitter {
315
364
  let userResponse;
316
365
  try {
317
366
  userResponse = await new Promise((resolve, reject) => {
318
- this.pendingInputResolve = resolve;
367
+ let settled = false;
368
+ this.pendingInputResolve = (response) => {
369
+ if (!settled) {
370
+ settled = true;
371
+ clearTimeout(timer);
372
+ resolve(response);
373
+ }
374
+ };
319
375
  const timer = setTimeout(() => {
320
- if (this.pendingInputResolve) {
376
+ if (!settled) {
377
+ settled = true;
321
378
  this.pendingInputResolve = null;
322
379
  this.pendingInputRequestId = null;
323
380
  reject(new Error('用户未响应'));
324
381
  }
325
382
  }, 5 * 60 * 1000);
326
- // Store timer ref so resolveInput can clear it
383
+ // Store timer ref so cleanup can clear it
327
384
  this._askHumanTimer = timer;
328
385
  });
329
386
  }
@@ -339,7 +396,7 @@ export class BrowsingAgent extends EventEmitter {
339
396
  const responseText = JSON.stringify(userResponse);
340
397
  console.log(`[Agent] 用户输入已收到`);
341
398
  this.emitEvent({ type: 'tool_result', name: 'ask_human', success: true, summary: redactedText, iteration: this.state.iteration });
342
- this.messages.push({ role: 'tool', tool_call_id: toolCall.id, content: responseText });
399
+ this.conversation.push({ role: 'tool', tool_call_id: toolCall.id, content: responseText });
343
400
  continue;
344
401
  }
345
402
  // 强制覆盖 sessionId,防止 LLM 猜测错误的值
@@ -347,6 +404,32 @@ export class BrowsingAgent extends EventEmitter {
347
404
  if (this.state.sessionId) {
348
405
  mcpArgs.sessionId = this.state.sessionId;
349
406
  }
407
+ // Auto-recall site memory before navigate calls
408
+ if ((name === 'navigate' || name === 'navigate_and_extract') && mcpArgs.url && this.knowledgeStore) {
409
+ try {
410
+ const best = this.findBestCard(mcpArgs.url);
411
+ if (best && !this.recalledDomains.has(best.domain)) {
412
+ this.recalledDomains.add(best.domain);
413
+ // Also mark normalized domain to avoid duplicate recalls
414
+ const normalized = MemoryCapturer.extractDomain(mcpArgs.url);
415
+ if (normalized)
416
+ this.recalledDomains.add(normalized);
417
+ const context = MemoryInjector.buildContext(best.card, 2000, this.taskText);
418
+ console.log(`[Agent] 自动召回站点记忆: ${best.domain} (${best.card.patterns.length} 条模式)`);
419
+ deferredHints.push(`[系统提示] 以下是该站点的历史操作记忆,请优先按照记忆中的步骤和选择器操作,避免重复探索。如果记忆中提供了 CSS 选择器,请直接使用 execute_javascript + querySelector 操作元素。\n\n${context}`);
420
+ this.emitEvent({
421
+ type: 'memory_recall',
422
+ domain: best.domain,
423
+ patternCount: best.card.patterns.length,
424
+ context,
425
+ iteration: this.state.iteration,
426
+ });
427
+ }
428
+ }
429
+ catch (err) {
430
+ console.log(`[Agent] 站点记忆召回失败: ${err.message}`);
431
+ }
432
+ }
350
433
  let rawText;
351
434
  let success = true;
352
435
  try {
@@ -360,23 +443,94 @@ export class BrowsingAgent extends EventEmitter {
360
443
  rawText = JSON.stringify({ error: err.message });
361
444
  success = false;
362
445
  }
446
+ // Record in tracker
447
+ this.toolTracker.record({
448
+ toolName: name,
449
+ args,
450
+ success,
451
+ timestamp: Date.now(),
452
+ errorCode: success ? undefined : extractErrorCode(rawText),
453
+ });
363
454
  if (!success) {
364
455
  this.state.consecutiveErrors++;
365
456
  console.log(`[Agent] 错误 (${this.state.consecutiveErrors}/${config.maxConsecutiveErrors}): ${rawText}`);
457
+ const errorCode = extractErrorCode(rawText);
458
+ const recovery = determineRecovery({
459
+ errorCode,
460
+ errorMessage: rawText,
461
+ toolName: name,
462
+ consecutiveErrors: this.state.consecutiveErrors,
463
+ });
464
+ if (recovery.type === 'abort') {
465
+ this.conversation.push({ role: 'tool', tool_call_id: toolCall.id, content: rawText });
466
+ return { success: false, error: recovery.reason, iterations: this.state.iteration };
467
+ }
366
468
  if (this.state.consecutiveErrors >= config.maxConsecutiveErrors) {
367
- this.messages.push({ role: 'tool', tool_call_id: toolCall.id, content: rawText });
469
+ this.conversation.push({ role: 'tool', tool_call_id: toolCall.id, content: rawText });
368
470
  return {
369
471
  success: false,
370
472
  error: `连续 ${config.maxConsecutiveErrors} 次错误,任务中止`,
371
473
  iterations: this.state.iteration,
372
474
  };
373
475
  }
476
+ if (recovery.type === 'inject_hint') {
477
+ this.conversation.push({ role: 'tool', tool_call_id: toolCall.id, content: rawText });
478
+ deferredHints.push(`[系统提示] ⚠️ ${recovery.message}`);
479
+ continue;
480
+ }
481
+ // recovery.type === 'retry': apply delay before next iteration
482
+ if (recovery.type === 'retry' && recovery.delayMs > 0) {
483
+ await new Promise(r => setTimeout(r, recovery.delayMs));
484
+ }
374
485
  }
375
486
  else {
376
487
  this.state.consecutiveErrors = 0;
377
488
  }
378
- // SSE event sends full content; LLM message gets truncated version
379
- const formatted = formatForLLM(rawText, name);
489
+ // Loop/pattern detection defer hint to avoid interleaving with tool results
490
+ const loopDetection = this.toolTracker.detectAny();
491
+ if (loopDetection) {
492
+ console.log(`[Agent] 检测到${loopDetection.type},注入提醒`);
493
+ deferredHints.push(`[系统提示] ⚠️ ${loopDetection.message}`);
494
+ }
495
+ // SSE event sends full content; LLM message gets budget-aware version
496
+ let formatted = formatToolResult(rawText, name);
497
+ // Apply page state diff for get_page_info on same-page refreshes
498
+ if (name === 'get_page_info' && success) {
499
+ try {
500
+ const pageData = JSON.parse(rawText);
501
+ const elements = Array.isArray(pageData.elements) ? pageData.elements : [];
502
+ const url = pageData.page?.url || '';
503
+ const diff = this.pageStateCache.update(this.state.sessionId, elements, url);
504
+ if (!diff.isNewPage && (diff.added.length + diff.removed.length + diff.changed.length) > 0) {
505
+ const diffLines = [
506
+ `## Page State Diff (unchanged: ${diff.unchangedCount})`,
507
+ '',
508
+ ];
509
+ if (diff.added.length > 0) {
510
+ diffLines.push(`### Added (${diff.added.length})`);
511
+ for (const el of diff.added.slice(0, 20)) {
512
+ diffLines.push(`- \`${el.id}\` ${el.type || ''} ${el.label || ''}`);
513
+ }
514
+ }
515
+ if (diff.removed.length > 0) {
516
+ diffLines.push(`### Removed (${diff.removed.length})`);
517
+ for (const id of diff.removed.slice(0, 20)) {
518
+ diffLines.push(`- \`${id}\``);
519
+ }
520
+ }
521
+ if (diff.changed.length > 0) {
522
+ diffLines.push(`### Changed (${diff.changed.length})`);
523
+ for (const el of diff.changed.slice(0, 20)) {
524
+ diffLines.push(`- \`${el.id}\` ${el.type || ''} ${el.label || ''}`);
525
+ }
526
+ }
527
+ formatted = diffLines.join('\n');
528
+ }
529
+ }
530
+ catch {
531
+ // Parse failed — use the standard formatted output
532
+ }
533
+ }
380
534
  console.log(`[Agent] 结果: ${formatted.slice(0, 200)}${formatted.length > 200 ? '...' : ''}`);
381
535
  this.emitEvent({
382
536
  type: 'tool_result',
@@ -385,15 +539,143 @@ export class BrowsingAgent extends EventEmitter {
385
539
  summary: rawText,
386
540
  iteration: this.state.iteration,
387
541
  });
388
- this.messages.push({
542
+ this.conversation.push({
389
543
  role: 'tool',
390
544
  tool_call_id: toolCall.id,
391
545
  content: formatted,
392
546
  });
547
+ // Emit progress after each tool call
548
+ const progress = this.progressEstimator.record(name);
549
+ this.emitEvent({ type: 'progress', progress, iteration: this.state.iteration });
550
+ }
551
+ // Push all deferred hints after tool results to avoid breaking tool message contiguity
552
+ for (const hint of deferredHints) {
553
+ this.conversation.push({ role: 'user', content: hint });
393
554
  }
394
555
  return null;
395
556
  }
557
+ /**
558
+ * Ask LLM to select relevant site memories from the index.
559
+ * Returns cards for domains the LLM considers useful for the task (max 3).
560
+ */
561
+ async selectMemories(task) {
562
+ if (!this.knowledgeStore)
563
+ return [];
564
+ const entries = this.knowledgeStore.listDomains();
565
+ if (entries.length === 0)
566
+ return [];
567
+ // Sort by recency, cap at 50 to limit prompt size
568
+ const sorted = [...entries].sort((a, b) => b.lastUsedAt - a.lastUsedAt);
569
+ const capped = sorted.slice(0, 50);
570
+ // Format index as compact list (sanitize descriptions to prevent injection)
571
+ const lines = capped.map(e => {
572
+ const tags = [];
573
+ if (e.siteType)
574
+ tags.push(e.siteType);
575
+ if (e.requiresLogin)
576
+ tags.push('需登录');
577
+ const safeDesc = e.topPatterns
578
+ .map(d => d.replace(/[\n\r]/g, ' ').slice(0, 60))
579
+ .join('; ');
580
+ const desc = safeDesc ? ` — ${safeDesc}` : '';
581
+ const tagStr = tags.length > 0 ? ` [${tags.join(', ')}]` : '';
582
+ return `- ${e.domain} (${e.patternCount}条模式)${tagStr}${desc}`;
583
+ });
584
+ const selectionPrompt = `你是一个记忆选择器。根据用户任务,从以下站点记忆列表中选出相关的站点(可以是0个或多个)。
585
+
586
+ ## 可用站点记忆
587
+ \`\`\`
588
+ ${lines.join('\n')}
589
+ \`\`\`
590
+
591
+ ## 用户任务
592
+ ${task}
593
+
594
+ 请只返回相关站点的域名,每行一个。如果没有相关的,返回"无"。不要解释。`;
595
+ try {
596
+ const response = await this.openai.chat.completions.create({
597
+ model: this.model,
598
+ messages: [{ role: 'user', content: selectionPrompt }],
599
+ max_tokens: 200,
600
+ });
601
+ this.tokenTracker.recordLLMCall(response.usage);
602
+ const text = response.choices[0]?.message?.content?.trim() || '';
603
+ if (!text || text === '无')
604
+ return [];
605
+ // Scan response for known domain names (robust against markdown/extra text)
606
+ const domainSet = new Set(capped.map(e => e.domain));
607
+ const responseText = text.toLowerCase();
608
+ const selected = [];
609
+ for (const entry of capped) {
610
+ if (responseText.includes(entry.domain.toLowerCase())) {
611
+ const card = this.knowledgeStore.loadCard(entry.domain);
612
+ if (card && card.patterns.length > 0) {
613
+ selected.push({ domain: entry.domain, card });
614
+ }
615
+ }
616
+ }
617
+ // Cap at 3 to avoid flooding conversation context
618
+ const result = selected.slice(0, 3);
619
+ console.log(`[Agent] LLM 记忆选择: ${result.length > 0 ? result.map(s => s.domain).join(', ') : '无匹配'}`);
620
+ return result;
621
+ }
622
+ catch (err) {
623
+ console.log(`[Agent] 记忆选择 LLM 调用失败: ${err.message}`);
624
+ return [];
625
+ }
626
+ }
627
+ /**
628
+ * Find the best knowledge card for a domain, checking normalized domain,
629
+ * full hostname, and subdomain variants in the index.
630
+ */
631
+ findBestCard(url) {
632
+ if (!this.knowledgeStore)
633
+ return null;
634
+ const candidates = [];
635
+ // 1. Normalized domain (e.g. cn.bing.com → bing.com)
636
+ const normalized = MemoryCapturer.extractDomain(url);
637
+ if (normalized) {
638
+ const card = this.knowledgeStore.loadCard(normalized);
639
+ if (card && card.patterns.length > 0)
640
+ candidates.push({ domain: normalized, card });
641
+ }
642
+ // 2. Full hostname (e.g. cn.bing.com)
643
+ try {
644
+ const hostname = new URL(url).hostname.replace(/^www\./, '');
645
+ if (hostname && hostname !== normalized) {
646
+ const card = this.knowledgeStore.loadCard(hostname);
647
+ if (card && card.patterns.length > 0)
648
+ candidates.push({ domain: hostname, card });
649
+ }
650
+ }
651
+ catch { /* ignore */ }
652
+ // 3. Scan index for subdomain variants (e.g. bing.com matches cn.bing.com)
653
+ if (normalized) {
654
+ for (const entry of this.knowledgeStore.listDomains()) {
655
+ if (entry.domain !== normalized && entry.domain.endsWith('.' + normalized)) {
656
+ const card = this.knowledgeStore.loadCard(entry.domain);
657
+ if (card && card.patterns.length > 0)
658
+ candidates.push({ domain: entry.domain, card });
659
+ }
660
+ }
661
+ }
662
+ if (candidates.length === 0)
663
+ return null;
664
+ // Pick the card with the most task_intent patterns, then most total patterns
665
+ candidates.sort((a, b) => {
666
+ const intentA = a.card.patterns.filter(p => p.type === 'task_intent').length;
667
+ const intentB = b.card.patterns.filter(p => p.type === 'task_intent').length;
668
+ if (intentA !== intentB)
669
+ return intentB - intentA;
670
+ return b.card.patterns.length - a.card.patterns.length;
671
+ });
672
+ return candidates[0];
673
+ }
396
674
  async cleanup() {
675
+ if (this._askHumanTimer) {
676
+ clearTimeout(this._askHumanTimer);
677
+ this._askHumanTimer = null;
678
+ }
397
679
  if (this.state.sessionId) {
398
680
  console.log('[Agent] 清理浏览器会话...');
399
681
  try {