entari-plugin-hyw 3.2.113__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -9,16 +9,18 @@ from loguru import logger
9
9
  from openai import AsyncOpenAI
10
10
 
11
11
  from .config import HYWConfig
12
- from ..utils.mcp_playwright import MCPPlaywrightManager
13
12
  from ..utils.search import SearchService
14
13
  from ..utils.prompts import (
15
- AGENT_SYSTEM_PROMPT,
16
- AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD,
17
- AGENT_SYSTEM_PROMPT_MCP_ADD,
18
- AGENT_SYSTEM_PROMPT_SEARCH_ADD,
19
- INTRUCT_SYSTEM_PROMPT,
20
- INTRUCT_SYSTEM_PROMPT_VISION_ADD,
21
- VISION_SYSTEM_PROMPT,
14
+ AGENT_SP,
15
+ AGENT_SP_INTRUCT_VISION_ADD,
16
+ AGENT_SP_TOOLS_STANDARD_ADD,
17
+ AGENT_SP_TOOLS_AGENT_ADD,
18
+ AGENT_SP_SEARCH_ADD,
19
+ AGENT_SP_PAGE_ADD,
20
+ AGENT_SP_IMAGE_SEARCH_ADD,
21
+ INTRUCT_SP,
22
+ INTRUCT_SP_VISION_ADD,
23
+ VISION_SP,
22
24
  )
23
25
 
24
26
  @asynccontextmanager
@@ -35,24 +37,26 @@ class ProcessingPipeline:
35
37
  self.config = config
36
38
  self.search_service = SearchService(config)
37
39
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
38
- # Build Playwright MCP args with headless flag if configured
39
- playwright_args = getattr(self.config, "playwright_mcp_args", None)
40
- if playwright_args is None:
41
- playwright_args = ["-y", "@playwright/mcp@latest"]
42
- # Add --headless flag if headless mode is enabled
43
- if getattr(self.config, "headless", True):
44
- playwright_args.append("--headless")
45
-
46
- self.mcp_playwright = MCPPlaywrightManager(
47
- command=getattr(self.config, "playwright_mcp_command", "npx"),
48
- args=playwright_args,
49
- )
40
+ self.all_web_results = [] # Cache for search results
41
+ self.current_mode = "standard" # standard | agent
50
42
 
51
43
  self.web_search_tool = {
52
44
  "type": "function",
53
45
  "function": {
54
- "name": "web_search",
55
- "description": "Search the web for text and images.",
46
+ "name": "internal_web_search",
47
+ "description": "Search the web for text.",
48
+ "parameters": {
49
+ "type": "object",
50
+ "properties": {"query": {"type": "string"}},
51
+ "required": ["query"],
52
+ },
53
+ },
54
+ }
55
+ self.image_search_tool = {
56
+ "type": "function",
57
+ "function": {
58
+ "name": "internal_image_search",
59
+ "description": "Search for images related to a query.",
56
60
  "parameters": {
57
61
  "type": "object",
58
62
  "properties": {"query": {"type": "string"}},
@@ -60,18 +64,32 @@ class ProcessingPipeline:
60
64
  },
61
65
  },
62
66
  }
63
- self.grant_mcp_playwright_tool = {
67
+ self.set_mode_tool = {
64
68
  "type": "function",
65
69
  "function": {
66
- "name": "grant_mcp_playwright",
67
- "description": "Decide whether to grant Playwright MCP browser tools to the agent for this request.",
70
+ "name": "set_mode",
71
+ "description": "设定后续 Agent 的运行模式: standard | agent",
68
72
  "parameters": {
69
73
  "type": "object",
70
74
  "properties": {
71
- "grant": {"type": "boolean"},
75
+ "mode": {"type": "string", "enum": ["standard", "agent"]},
72
76
  "reason": {"type": "string"},
73
77
  },
74
- "required": ["grant"],
78
+ "required": ["mode"],
79
+ },
80
+ },
81
+ }
82
+ self.crawl_page_tool = {
83
+ "type": "function",
84
+ "function": {
85
+ "name": "crawl_page",
86
+ "description": "使用 Crawl4AI 抓取网页并返回 Markdown 文本。",
87
+ "parameters": {
88
+ "type": "object",
89
+ "properties": {
90
+ "url": {"type": "string"},
91
+ },
92
+ "required": ["url"],
75
93
  },
76
94
  },
77
95
  }
@@ -99,6 +117,9 @@ class ProcessingPipeline:
99
117
  current_history = conversation_history
100
118
  final_response_content = ""
101
119
  structured: Dict[str, Any] = {}
120
+
121
+ # Reset search cache for this execution
122
+ self.all_web_results = []
102
123
 
103
124
  try:
104
125
  logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
@@ -122,7 +143,7 @@ class ProcessingPipeline:
122
143
  or getattr(self.config, "vision_model_name", None)
123
144
  or active_model
124
145
  )
125
- vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SYSTEM_PROMPT
146
+ vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SP
126
147
  vision_prompt = vision_prompt_tpl.format(user_msgs=user_input or "[图片]")
127
148
  vision_text, vision_usage = await self._run_vision_stage(
128
149
  user_input=user_input,
@@ -179,38 +200,31 @@ class ProcessingPipeline:
179
200
  intruct_trace["cost"] = instruct_cost
180
201
  trace["intruct"] = intruct_trace
181
202
 
182
- explicit_mcp_intent = "mcp" in (user_input or "").lower()
183
- grant_requested = bool(intruct_trace.get("grant_mcp_playwright", False))
184
- grant_mcp = bool(grant_requested and explicit_mcp_intent)
185
- intruct_trace["explicit_mcp_intent"] = explicit_mcp_intent
186
- intruct_trace["grant_effective"] = grant_mcp
187
- if grant_requested and not explicit_mcp_intent:
188
- logger.info("Intruct requested MCP grant, but user did not express MCP intent. Grant ignored.")
189
- if grant_mcp:
190
- logger.warning(f"MCP Playwright granted for this request: reason={intruct_trace.get('grant_reason')!r}")
191
-
192
203
  # Start agent loop
193
204
  agent_start_time = time.time()
194
205
  current_history.append({"role": "user", "content": user_input or "..."})
195
206
 
196
- max_steps = 6
207
+ mode = intruct_trace.get("mode", self.current_mode).lower()
208
+ logger.success(f"Instruct Mode: {mode}")
209
+ self.current_mode = mode
210
+
211
+ # Determine max iterations
212
+ max_steps = 10 if mode == "agent" else 1
213
+
197
214
  step = 0
198
215
  agent_trace_steps: List[Dict[str, Any]] = []
199
216
  last_system_prompt = ""
200
217
 
201
- mcp_tools_openai: Optional[List[Dict[str, Any]]] = None
202
- if grant_mcp:
203
- mcp_tools_openai = await self.mcp_playwright.tools_openai()
204
- if not mcp_tools_openai:
205
- logger.warning("MCP Playwright was granted but tools are unavailable (connect failed).")
206
- grant_mcp = False
218
+ agent_tools: Optional[List[Dict[str, Any]]] = None
219
+ if mode == "agent":
220
+ agent_tools = [self.web_search_tool, self.image_search_tool, self.crawl_page_tool]
207
221
 
208
- # Agent loop - always runs regardless of MCP grant status
222
+ # Agent loop
209
223
  while step < max_steps:
210
224
  step += 1
211
225
  logger.info(f"Pipeline: Agent step {step}/{max_steps}")
212
226
 
213
- if step == 5:
227
+ if step == 5 and mode == "agent":
214
228
  current_history.append(
215
229
  {
216
230
  "role": "system",
@@ -218,42 +232,73 @@ class ProcessingPipeline:
218
232
  }
219
233
  )
220
234
 
221
- agent_tools = mcp_tools_openai if grant_mcp else None
222
- tools_desc = "\n".join([t["function"]["name"] for t in (agent_tools or [])]) if agent_tools else ""
235
+ tools_desc = ""
236
+ if agent_tools:
237
+ tools_desc = "\n".join([
238
+ "- internal_web_search(query): 触发搜索并缓存结果",
239
+ "- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
240
+ ])
223
241
 
224
242
  user_msgs_text = user_input or ""
225
243
 
226
- search_msgs_text = self._format_search_msgs(search_payloads)
227
- has_search_results = bool(search_payloads) # Only append if search was actually performed
244
+ search_msgs_text = self._format_search_msgs()
245
+ image_msgs_text = self._format_image_search_msgs()
246
+
247
+ has_search_results = any(not r.get("is_image") for r in self.all_web_results)
248
+ has_image_results = any(r.get("is_image") for r in self.all_web_results)
228
249
 
229
- # Build agent system prompt with modular ADD sections
230
- agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SYSTEM_PROMPT
231
- system_prompt = agent_prompt_tpl.format(user_msgs=user_msgs_text)
250
+ # Build agent system prompt
251
+ agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
252
+
253
+ mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
254
+ system_prompt = agent_prompt_tpl.format(
255
+ user_msgs=user_msgs_text,
256
+ mode=mode,
257
+ mode_desc=mode_desc_text
258
+ )
232
259
 
233
260
  # Append vision text if available
234
261
  if vision_text:
235
- system_prompt += AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
236
-
237
- # Append search results if search was performed and has results
238
- if has_search_results:
239
- system_prompt += AGENT_SYSTEM_PROMPT_SEARCH_ADD.format(search_msgs=search_msgs_text)
262
+ system_prompt += AGENT_SP_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
240
263
 
241
- # Append MCP addon prompt when MCP is granted
242
- if grant_mcp and tools_desc:
243
- system_prompt += AGENT_SYSTEM_PROMPT_MCP_ADD.format(tools_desc=tools_desc)
264
+ # Append search results
265
+ if has_search_results and search_msgs_text:
266
+ system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs=search_msgs_text)
244
267
 
268
+ # Append crawled page content
269
+ page_msgs_text = self._format_page_msgs()
270
+ if page_msgs_text:
271
+ system_prompt += AGENT_SP_PAGE_ADD.format(page_msgs=page_msgs_text)
272
+
273
+ if has_image_results and image_msgs_text:
274
+ system_prompt += AGENT_SP_IMAGE_SEARCH_ADD.format(image_search_msgs=image_msgs_text)
275
+
245
276
  last_system_prompt = system_prompt
246
277
 
247
278
  messages = [{"role": "system", "content": system_prompt}]
248
279
  messages.extend(current_history)
249
280
 
250
281
  tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
282
+
283
+ # Debug logging
284
+ if tools_for_step:
285
+ logger.info(f"[Agent] Tools provided: {[t['function']['name'] for t in tools_for_step]}")
286
+ else:
287
+ logger.warning(f"[Agent] NO TOOLS provided for step {step} (agent_tools={agent_tools is not None}, step<max={step < max_steps})")
288
+
289
+ step_llm_start = time.time()
251
290
  response, step_usage = await self._safe_llm_call(
252
291
  messages=messages,
253
292
  model=active_model,
254
293
  tools=tools_for_step,
255
294
  tool_choice="auto" if tools_for_step else None,
256
295
  )
296
+ step_llm_time = time.time() - step_llm_start
297
+
298
+ # Debug: Check response
299
+ has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
300
+ logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
301
+
257
302
  # Accumulate agent usage
258
303
  usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
259
304
  usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
@@ -262,16 +307,25 @@ class ProcessingPipeline:
262
307
  tool_calls = response.tool_calls
263
308
  stats["tool_calls_count"] += len(tool_calls)
264
309
 
265
- plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
266
- current_history.append(plan_dict)
310
+ # Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
311
+ assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
312
+ "role": "assistant",
313
+ "content": response.content,
314
+ "tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
315
+ }
316
+ current_history.append(assistant_msg)
267
317
 
268
- tasks = [self._safe_route_tool(tc, mcp_session=self.mcp_playwright if grant_mcp else None) for tc in tool_calls]
318
+ tasks = [self._safe_route_tool(tc) for tc in tool_calls]
319
+ tool_start_time = time.time()
269
320
  results = await asyncio.gather(*tasks)
321
+ tool_exec_time = time.time() - tool_start_time
270
322
 
271
323
  step_trace = {
272
324
  "step": step,
273
325
  "tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
274
326
  "tool_results": [],
327
+ "tool_time": tool_exec_time,
328
+ "llm_time": step_llm_time,
275
329
  }
276
330
  for i, result in enumerate(results):
277
331
  tc = tool_calls[i]
@@ -300,19 +354,16 @@ class ProcessingPipeline:
300
354
 
301
355
  agent_time = time.time() - agent_start_time
302
356
 
303
- # Calculate Agent Cost (accumulated steps)
357
+ # Calculate Agent Cost
304
358
  agent_cost = 0.0
305
359
  a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
306
360
  a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
307
361
 
308
- # Agent usage is already in usage_totals, but that includes ALL stages.
309
- # We need just Agent tokens for Agent cost.
310
- # Agent inputs = Total inputs - Vision inputs - Instruct inputs
311
362
  agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - intruct_usage.get("input_tokens", 0)
312
363
  agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - intruct_usage.get("output_tokens", 0)
313
364
 
314
365
  if a_in_price > 0 or a_out_price > 0:
315
- agent_cost = (agent_input_tokens / 1_000_000 * a_in_price) + (agent_output_tokens / 1_000_000 * a_out_price)
366
+ agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
316
367
 
317
368
  trace["agent"] = {
318
369
  "model": active_model,
@@ -320,7 +371,6 @@ class ProcessingPipeline:
320
371
  "system_prompt": last_system_prompt,
321
372
  "steps": agent_trace_steps,
322
373
  "final_output": final_response_content,
323
- "mcp_granted": grant_mcp,
324
374
  "time": agent_time,
325
375
  "cost": agent_cost
326
376
  }
@@ -335,54 +385,36 @@ class ProcessingPipeline:
335
385
  "output_tokens": usage_totals["output_tokens"],
336
386
  "total_cost": 0.0,
337
387
  }
338
- # Calculate cost if any pricing is configured
339
388
  input_price = getattr(self.config, "input_price", None) or 0.0
340
389
  output_price = getattr(self.config, "output_price", None) or 0.0
341
390
 
342
391
  if input_price > 0 or output_price > 0:
343
- # Price is per million tokens
344
392
  input_cost = (usage_totals["input_tokens"] / 1_000_000) * input_price
345
393
  output_cost = (usage_totals["output_tokens"] / 1_000_000) * output_price
346
394
  billing_info["total_cost"] = input_cost + output_cost
347
- # logger.info(f"Billing: {usage_totals['input_tokens']} in @ ${input_price}/M + {usage_totals['output_tokens']} out @ ${output_price}/M = ${billing_info['total_cost']:.6f}")
348
395
 
349
396
  # Build stages_used list for UI display
350
- # Order: Vision (if used) -> Search (if performed) -> Agent
351
397
  stages_used = []
352
398
 
353
- # Helper to infer icon from model name or base_url
354
399
  def infer_icon(model_name: str, base_url: str) -> str:
355
400
  model_lower = (model_name or "").lower()
356
401
  url_lower = (base_url or "").lower()
357
-
358
- if "deepseek" in model_lower or "deepseek" in url_lower:
359
- return "deepseek"
360
- elif "claude" in model_lower or "anthropic" in url_lower:
361
- return "anthropic"
362
- elif "gemini" in model_lower or "google" in url_lower:
363
- return "google"
364
- elif "gpt" in model_lower or "openai" in url_lower:
365
- return "openai"
366
- elif "qwen" in model_lower:
367
- return "qwen"
368
- elif "openrouter" in url_lower:
369
- return "openrouter"
370
- return "openai" # Default fallback
402
+ if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
403
+ elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
404
+ elif "gemini" in model_lower or "google" in url_lower: return "google"
405
+ elif "gpt" in model_lower or "openai" in url_lower: return "openai"
406
+ elif "qwen" in model_lower: return "qwen"
407
+ elif "openrouter" in url_lower: return "openrouter"
408
+ return "openai"
371
409
 
372
- # Helper to infer provider from base_url
373
410
  def infer_provider(base_url: str) -> str:
374
411
  url_lower = (base_url or "").lower()
375
- if "openrouter" in url_lower:
376
- return "OpenRouter"
377
- elif "openai" in url_lower:
378
- return "OpenAI"
379
- elif "anthropic" in url_lower:
380
- return "Anthropic"
381
- elif "google" in url_lower:
382
- return "Google"
383
- elif "deepseek" in url_lower:
384
- return "DeepSeek"
385
- return "" # Empty string = don't show provider
412
+ if "openrouter" in url_lower: return "OpenRouter"
413
+ elif "openai" in url_lower: return "OpenAI"
414
+ elif "anthropic" in url_lower: return "Anthropic"
415
+ elif "google" in url_lower: return "Google"
416
+ elif "deepseek" in url_lower: return "DeepSeek"
417
+ return ""
386
418
 
387
419
  if trace.get("vision"):
388
420
  v = trace["vision"]
@@ -404,36 +436,157 @@ class ProcessingPipeline:
404
436
  stages_used.append({
405
437
  "name": "Instruct",
406
438
  "model": i_model,
407
- "icon_config": getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
439
+ "icon_config": getattr(self.config, "instruct_icon", None) or getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
408
440
  "provider": infer_provider(i_base_url),
409
441
  "time": i.get("time", 0),
410
442
  "cost": i.get("cost", 0.0)
411
443
  })
412
444
 
413
- # Show Search stage only when search was actually performed
414
- if search_payloads:
415
- # Use dedicated SearXNG metadata as requested
445
+ if has_search_results and search_payloads:
416
446
  stages_used.append({
417
447
  "name": "Search",
418
- "model": "SearXNG",
419
- "icon_config": "search", # Ensure mapping exists or handle specially in render
420
- "provider": "SearXNG",
448
+ "model": getattr(self.config, "search_name", "DuckDuckGo"),
449
+ "icon_config": "search",
450
+ "provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
421
451
  "time": search_time,
422
- "cost": 0.0 # Search is free in this plugin
452
+ "cost": 0.0
423
453
  })
424
454
 
455
+ # Add Crawler stage if Instruct used crawl_page
456
+ if trace.get("intruct"):
457
+ intruct_tool_calls = trace["intruct"].get("tool_calls", [])
458
+ crawl_calls = [tc for tc in intruct_tool_calls if tc.get("name") == "crawl_page"]
459
+ if crawl_calls:
460
+ # Build crawled_pages list for UI
461
+ crawled_pages = []
462
+ for tc in crawl_calls:
463
+ url = tc.get("arguments", {}).get("url", "")
464
+ # Try to find cached result
465
+ found = next((r for r in self.all_web_results if r.get("url") == url and r.get("is_crawled")), None)
466
+ if found:
467
+ try:
468
+ from urllib.parse import urlparse
469
+ domain = urlparse(url).netloc
470
+ except:
471
+ domain = ""
472
+ crawled_pages.append({
473
+ "title": found.get("title", "Page"),
474
+ "url": url,
475
+ "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
476
+ })
477
+
478
+ stages_used.append({
479
+ "name": "Crawler",
480
+ "model": "Crawl4AI",
481
+ "icon_config": "search",
482
+ "provider": "网页抓取",
483
+ "time": search_time, # Use existing search_time which includes fetch time
484
+ "cost": 0.0,
485
+ "crawled_pages": crawled_pages
486
+ })
487
+
488
+ # --- Granular Agent Stages (Grouped) ---
425
489
  if trace.get("agent"):
426
490
  a = trace["agent"]
427
491
  a_model = a.get("model", "") or active_model
428
492
  a_base_url = a.get("base_url", "") or self.config.base_url
429
- stages_used.append({
430
- "name": "Agent",
431
- "model": a_model,
432
- "icon_config": getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url),
433
- "provider": infer_provider(a_base_url),
434
- "time": a.get("time", 0),
435
- "cost": a.get("cost", 0.0)
436
- })
493
+ steps = a.get("steps", [])
494
+ agent_icon = getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url)
495
+ agent_provider = infer_provider(a_base_url)
496
+
497
+ for s in steps:
498
+ if "tool_calls" in s:
499
+ # 1. Agent Thought Stage (with LLM time)
500
+ stages_used.append({
501
+ "name": "Agent",
502
+ "model": a_model,
503
+ "icon_config": agent_icon,
504
+ "provider": agent_provider,
505
+ "time": s.get("llm_time", 0), "cost": 0
506
+ })
507
+
508
+ # 2. Grouped Tool Stages
509
+ # Collect results for grouping
510
+ search_group_items = []
511
+ crawler_group_items = []
512
+
513
+ tcs = s.get("tool_calls", [])
514
+ trs = s.get("tool_results", [])
515
+
516
+ for idx, tc in enumerate(tcs):
517
+ t_name = tc.get("name")
518
+ # Try to get result content if available
519
+ t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
520
+
521
+ if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
522
+ # We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
523
+ # But search results are cached in self.all_web_results.
524
+ # The 'content' of search tool result is basically "cached_for_prompt".
525
+ # So we don't need to put items here, just show "Search" container.
526
+ # But wait, if we want to show "what was searched", we can parse args.
527
+ args = tc.get("arguments", {})
528
+ query = args.get("query", "")
529
+ if query:
530
+ search_group_items.append({"query": query})
531
+
532
+ elif t_name == "crawl_page":
533
+ # Get URL from arguments, title from result
534
+ args = tc.get("arguments", {})
535
+ url = args.get("url", "")
536
+ title = "Page"
537
+ try:
538
+ page_data = json.loads(t_res_content)
539
+ if isinstance(page_data, dict):
540
+ title = page_data.get("title", "Page")
541
+ except:
542
+ pass
543
+
544
+ if url:
545
+ try:
546
+ domain = urlparse(url).netloc
547
+ except:
548
+ domain = ""
549
+ crawler_group_items.append({
550
+ "title": title,
551
+ "url": url,
552
+ "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
553
+ })
554
+
555
+ # Append Grouped Stages
556
+ if search_group_items:
557
+ stages_used.append({
558
+ "name": "Search",
559
+ "model": getattr(self.config, "search_name", "DuckDuckGo"),
560
+ "icon_config": "search",
561
+ "provider": "Agent Search",
562
+ "time": s.get("tool_time", 0), "cost": 0,
563
+ "queries": search_group_items # Render can use this if needed, or just show generic
564
+ })
565
+
566
+ if crawler_group_items:
567
+ stages_used.append({
568
+ "name": "Crawler",
569
+ "model": "Crawl4AI",
570
+ "icon_config": "browser",
571
+ "provider": "Page Fetcher",
572
+ "time": s.get("tool_time", 0), "cost": 0,
573
+ "crawled_pages": crawler_group_items
574
+ })
575
+
576
+ elif s.get("final"):
577
+ stages_used.append({
578
+ "name": "Agent",
579
+ "model": a_model,
580
+ "icon_config": agent_icon,
581
+ "provider": agent_provider,
582
+ "time": 0, "cost": 0
583
+ })
584
+
585
+ # Assign total time/cost to last Agent stage
586
+ last_agent = next((s for s in reversed(stages_used) if s["name"] == "Agent"), None)
587
+ if last_agent:
588
+ last_agent["time"] = a.get("time", 0)
589
+ last_agent["cost"] = a.get("cost", 0.0)
437
590
 
438
591
  return {
439
592
  "llm_response": final_content,
@@ -455,105 +608,190 @@ class ProcessingPipeline:
455
608
  "error": str(e),
456
609
  }
457
610
 
458
- async def _safe_route_tool(self, tool_call, mcp_session=None):
459
- """Wrapper for safe concurrent execution."""
460
- try:
461
- return await asyncio.wait_for(self._route_tool(tool_call, mcp_session=mcp_session), timeout=15.0)
462
- except asyncio.TimeoutError:
463
- return "Error: Tool execution timed out (15s limit)."
464
- except Exception as e:
465
- return f"Error: Tool execution failed: {e}"
466
-
467
611
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
468
- """Parse response for references and mcp blocks."""
469
- parsed = {"response": "", "references": [], "mcp_steps": []}
612
+ """Parse response for references and page references."""
613
+ parsed = {"response": "", "references": [], "page_references": [], "flow_steps": []}
470
614
  if not text:
471
615
  return parsed
472
616
 
473
617
  import re
474
618
 
475
619
  remaining_text = text
620
+
621
+ # 1. Try to unwrap JSON if the model acted like a ReAct agent
622
+ try:
623
+ # Check if it looks like JSON first to avoid performance hit
624
+ if remaining_text.strip().startswith("{") and "action" in remaining_text:
625
+ data = json.loads(remaining_text)
626
+ if isinstance(data, dict) and "action_input" in data:
627
+ remaining_text = data["action_input"]
628
+ except Exception:
629
+ pass
630
+
631
+ id_map = {} # Map original search ID (str) -> new index (int)
632
+ page_id_map = {} # Map original page ID (str) -> new index (int)
476
633
 
477
- # Parse references block
634
+ # Parse References Block (unified: contains both [search] and [page] entries)
478
635
  ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
479
636
  if ref_block_match:
480
637
  ref_content = ref_block_match.group(1).strip()
481
638
  for line in ref_content.split("\n"):
482
639
  line = line.strip()
483
- link_match = re.search(r"\[(.*?)\]\((.*?)\)", line)
484
- if link_match:
485
- parsed["references"].append({"title": link_match.group(1), "url": link_match.group(2)})
486
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
487
-
488
- # Parse mcp block - supports format:
489
- # [icon] tool_name
490
- # description
491
- mcp_block_match = re.search(r'```mcp\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
492
- if mcp_block_match:
493
- mcp_content = mcp_block_match.group(1).strip()
494
- lines = mcp_content.split("\n")
495
- current_step = None
496
-
497
- for line in lines:
498
- line_stripped = line.strip()
499
- if not line_stripped: continue
500
-
501
- # New Format: "1. [icon] name: description" OR "[icon] name: description"
502
- # Regex details:
503
- # ^(?:(?:\d+\.|[-*])\s+)? -> Optional numbering (1. or - or *)
504
- # \[(\w+)\] -> Icon in brackets [icon] -> group 1
505
- # \s+ -> separating space
506
- # ([^:]+) -> Tool Name (chars before colon) -> group 2
507
- # : -> Colon separator
508
- # \s*(.+) -> Description -> group 3
509
- new_format_match = re.match(r'^(?:(?:\d+\.|[-*])\s+)?\[(\w+)\]\s+([^:]+):\s*(.+)$', line_stripped)
640
+ if not line: continue
510
641
 
511
- # Old/Flexible Format: "[icon] name" (description might be on next line)
512
- flexible_match = re.match(r'^(?:(?:\d+\.|[-*])\s+)?\[(\w+)\]\s+(.+)$', line_stripped)
513
-
514
- if new_format_match:
515
- if current_step: parsed["mcp_steps"].append(current_step)
516
- current_step = {
517
- "icon": new_format_match.group(1).lower(),
518
- "name": new_format_match.group(2).strip(),
519
- "description": new_format_match.group(3).strip()
520
- }
521
- elif flexible_match:
522
- # Could be just "[icon] name" without description, or mixed
523
- if current_step: parsed["mcp_steps"].append(current_step)
524
- current_step = {
525
- "icon": flexible_match.group(1).lower(),
526
- "name": flexible_match.group(2).strip(),
527
- "description": ""
528
- }
529
- elif line.startswith(" ") and current_step:
530
- # Indented description line (continuation)
531
- if current_step["description"]:
532
- current_step["description"] += " " + line.strip()
642
+ # Match [id] [type] [title](url)
643
+ # e.g. [1] [search] [文本描述](url) or [5] [page] [页面标题](url)
644
+ id_match = re.match(r"^\[(\d+)\]", line)
645
+ type_match = re.search(r"\[(search|page)\]", line, re.IGNORECASE)
646
+ link_match = re.search(r"\[([^\[\]]+)\]\(([^)]+)\)", line)
647
+
648
+ idx = None
649
+ if id_match:
650
+ try:
651
+ idx = int(id_match.group(1))
652
+ except ValueError:
653
+ pass
654
+
655
+ ref_type = "search" # default
656
+ if type_match:
657
+ ref_type = type_match.group(1).lower()
658
+
659
+ entry = None
660
+ if idx is not None and self.all_web_results:
661
+ # For page type, only match crawled items
662
+ if ref_type == "page":
663
+ found = next((r for r in self.all_web_results if r.get("_id") == idx and r.get("is_crawled")), None)
533
664
  else:
534
- current_step["description"] = line.strip()
535
- elif line_stripped and not line_stripped.startswith("[") and current_step is None:
536
- # Plain text line without icon, treat as name if no current step
537
- # (This handles cases where LLM forgets brackets but lists steps)
538
- if current_step: parsed["mcp_steps"].append(current_step)
539
- current_step = {
540
- "icon": "default",
541
- "name": line_stripped,
542
- "description": ""
543
- }
544
-
545
- if current_step:
546
- parsed["mcp_steps"].append(current_step)
547
- remaining_text = remaining_text.replace(mcp_block_match.group(0), "").strip()
665
+ found = next((r for r in self.all_web_results if r.get("_id") == idx and not r.get("is_crawled")), None)
666
+
667
+ if found:
668
+ entry = {
669
+ "title": found.get("title"),
670
+ "url": found.get("url"),
671
+ "domain": found.get("domain", "")
672
+ }
673
+
674
+ if not entry and link_match:
675
+ entry = {"title": link_match.group(1), "url": link_match.group(2)}
676
+
677
+ if entry:
678
+ if ref_type == "page":
679
+ parsed["page_references"].append(entry)
680
+ if idx is not None:
681
+ page_id_map[str(idx)] = len(parsed["page_references"])
682
+ else:
683
+ parsed["references"].append(entry)
684
+ if idx is not None:
685
+ id_map[str(idx)] = len(parsed["references"])
686
+
687
+ remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
688
+
689
+ # Replace search:id citations
690
+ if id_map:
691
+ def replace_search_citation(match):
692
+ old_id = match.group(1) or match.group(2)
693
+ if old_id in id_map:
694
+ return f"`search:{id_map[old_id]}`"
695
+ return match.group(0)
696
+
697
+ remaining_text = re.sub(r'\[(\d+)\]', replace_search_citation, remaining_text)
698
+ remaining_text = re.sub(r'(?<!`)search:(\d+)(?!`)', replace_search_citation, remaining_text)
699
+ remaining_text = re.sub(r'`search:(\d+)`', replace_search_citation, remaining_text)
700
+
701
+ # Replace page:id citations
702
+ if page_id_map:
703
+ def replace_page_citation(match):
704
+ old_id = match.group(1)
705
+ if old_id in page_id_map:
706
+ return f"`page:{page_id_map[old_id]}`"
707
+ return match.group(0)
708
+
709
+ remaining_text = re.sub(r'(?<!`)page:(\d+)(?!`)', replace_page_citation, remaining_text)
710
+ remaining_text = re.sub(r'`page:(\d+)`', replace_page_citation, remaining_text)
548
711
 
549
712
  parsed["response"] = remaining_text.strip()
550
713
  return parsed
551
714
 
715
+ async def _safe_route_tool(self, tool_call):
716
+ """Wrapper for safe concurrent execution of tool calls."""
717
+ try:
718
+ return await asyncio.wait_for(self._route_tool(tool_call), timeout=30.0)
719
+ except asyncio.TimeoutError:
720
+ return "Error: Tool execution timed out (30s limit)."
721
+ except Exception as e:
722
+ return f"Error: Tool execution failed: {e}"
723
+
724
+ async def _route_tool(self, tool_call):
725
+ """Execute tool call and return result."""
726
+ name = tool_call.function.name
727
+ args = json.loads(html.unescape(tool_call.function.arguments))
728
+
729
+ if name == "internal_web_search" or name == "web_search":
730
+ query = args.get("query")
731
+ web = await self.search_service.search(query)
732
+
733
+ # Cache results and assign IDs
734
+ current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
735
+
736
+ for item in web:
737
+ current_max_id += 1
738
+ item["_id"] = current_max_id
739
+ item["query"] = query
740
+ self.all_web_results.append(item)
741
+
742
+ return json.dumps({"web_results_count": len(web), "status": "cached_for_prompt"}, ensure_ascii=False)
743
+
744
+ if name == "internal_image_search":
745
+ query = args.get("query")
746
+ images = await self.search_service.image_search(query)
747
+
748
+ current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
749
+ for item in images:
750
+ current_max_id += 1
751
+ item["_id"] = current_max_id
752
+ item["query"] = query
753
+ item["is_image"] = True
754
+ self.all_web_results.append(item)
755
+
756
+ return json.dumps({"image_results_count": len(images), "status": "cached_for_prompt"}, ensure_ascii=False)
757
+
758
+ if name == "crawl_page":
759
+ url = args.get("url")
760
+ logger.info(f"[Tool] Crawling page: {url}")
761
+ # Returns Dict: {content, title, url}
762
+ result_dict = await self.search_service.fetch_page(url)
763
+
764
+ # Cache the crawled content so Agent can access it
765
+ current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
766
+ current_max_id += 1
767
+
768
+ cached_item = {
769
+ "_id": current_max_id,
770
+ "title": result_dict.get("title", "Page"),
771
+ "url": result_dict.get("url", url),
772
+ "content": result_dict.get("content", "")[:2000], # Clip content for prompt
773
+ "domain": "",
774
+ "is_crawled": True,
775
+ }
776
+ try:
777
+ from urllib.parse import urlparse
778
+ cached_item["domain"] = urlparse(url).netloc
779
+ except:
780
+ pass
781
+
782
+ self.all_web_results.append(cached_item)
783
+
784
+ return json.dumps({"crawl_status": "success", "title": cached_item["title"], "content_length": len(result_dict.get("content", ""))}, ensure_ascii=False)
785
+
786
+ if name == "set_mode":
787
+ mode = args.get("mode", "standard")
788
+ self.current_mode = mode
789
+ return f"Mode set to {mode}"
790
+
791
+ return f"Unknown tool {name}"
792
+
793
+
552
794
  async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None):
553
- """
554
- Wrap LLM calls with timeout and error handling.
555
- Returns a tuple of (message, usage_dict) where usage_dict contains input_tokens and output_tokens.
556
- """
557
795
  try:
558
796
  return await asyncio.wait_for(
559
797
  self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client),
@@ -584,7 +822,6 @@ class ProcessingPipeline:
584
822
  )
585
823
  logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
586
824
 
587
- # Extract usage information
588
825
  usage = {"input_tokens": 0, "output_tokens": 0}
589
826
  if hasattr(response, "usage") and response.usage:
590
827
  usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
@@ -592,27 +829,7 @@ class ProcessingPipeline:
592
829
 
593
830
  return response.choices[0].message, usage
594
831
 
595
- async def _route_tool(self, tool_call, mcp_session=None):
596
- name = tool_call.function.name
597
- args = json.loads(html.unescape(tool_call.function.arguments))
598
-
599
- if name == "web_search":
600
- query = args.get("query")
601
- text_task = self.search_service.search(query)
602
- image_task = self.search_service.image_search(query)
603
- results = await asyncio.gather(text_task, image_task)
604
- return json.dumps({"web_results": results[0], "image_results": results[1][:5]}, ensure_ascii=False)
605
-
606
- if name == "grant_mcp_playwright":
607
- return "OK" # Minimal response, LLM already knows what it passed
608
-
609
- if mcp_session is not None and name.startswith("browser_"):
610
- return await mcp_session.call_tool_text(name, args or {})
611
-
612
- return f"Unknown tool {name}"
613
-
614
832
  async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
615
- """Returns (vision_text, usage_dict)."""
616
833
  content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
617
834
  for img_b64 in images:
618
835
  url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
@@ -633,13 +850,15 @@ class ProcessingPipeline:
633
850
  self, user_input: str, vision_text: str, model: str
634
851
  ) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
635
852
  """Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
636
- tools = [self.web_search_tool, self.grant_mcp_playwright_tool]
637
- tools_desc = "\n".join([t["function"]["name"] for t in tools])
853
+ # Instruct has access to: web_search, image_search, set_mode, crawl_page
854
+ tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool]
855
+ tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式"
638
856
 
639
- prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SYSTEM_PROMPT
857
+ prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SP
640
858
  prompt = prompt_tpl.format(user_msgs=user_input or "", tools_desc=tools_desc)
859
+
641
860
  if vision_text:
642
- prompt = f"{prompt}\\n\\n{INTRUCT_SYSTEM_PROMPT_VISION_ADD.format(vision_msgs=vision_text)}"
861
+ prompt = f"{prompt}\\n\\n{INTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
643
862
 
644
863
  client = self._client_for(
645
864
  api_key=getattr(self.config, "intruct_api_key", None),
@@ -666,22 +885,21 @@ class ProcessingPipeline:
666
885
  "prompt": prompt,
667
886
  "user_input": user_input or "",
668
887
  "vision_add": vision_text or "",
669
- "grant_mcp_playwright": False,
670
- "grant_reason": "",
671
888
  "tool_calls": [],
672
889
  "tool_results": [],
673
890
  "output": "",
674
891
  }
675
892
 
676
893
  search_time = 0.0
677
-
894
+ mode = "standard"
895
+ mode_reason = ""
896
+
678
897
  if response.tool_calls:
679
898
  plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
680
899
  history.append(plan_dict)
681
900
 
682
901
  tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
683
902
 
684
- # Measure search/tool execution time
685
903
  st = time.time()
686
904
  results = await asyncio.gather(*tasks)
687
905
  search_time = time.time() - st
@@ -693,66 +911,84 @@ class ProcessingPipeline:
693
911
  )
694
912
  intruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
695
913
  intruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
696
- if tc.function.name == "web_search":
914
+
915
+ if tc.function.name in ["web_search", "internal_web_search"]:
697
916
  search_payloads.append(str(result))
698
- elif tc.function.name == "grant_mcp_playwright":
917
+ elif tc.function.name == "set_mode":
699
918
  try:
700
919
  args = json.loads(html.unescape(tc.function.arguments))
701
920
  except Exception:
702
921
  args = {}
703
- intruct_trace["grant_mcp_playwright"] = bool(args.get("grant"))
704
- intruct_trace["grant_reason"] = str(args.get("reason") or "")
705
- # No second LLM call: tool-call arguments already include the extracted keywords/query
706
- # and the grant decision; avoid wasting tokens/time.
922
+ mode = args.get("mode", mode)
923
+ mode_reason = args.get("reason", "")
924
+
925
+ intruct_trace["mode"] = mode
926
+ if mode_reason:
927
+ intruct_trace["mode_reason"] = mode_reason
928
+
707
929
  intruct_trace["output"] = ""
708
930
  intruct_trace["usage"] = usage
709
931
  return "", search_payloads, intruct_trace, usage, search_time
710
932
 
933
+ intruct_trace["mode"] = mode
711
934
  intruct_trace["output"] = (response.content or "").strip()
712
935
  intruct_trace["usage"] = usage
713
936
  return "", search_payloads, intruct_trace, usage, 0.0
714
937
 
715
- def _format_search_msgs(self, search_payloads: List[str]) -> str:
716
- """
717
- Keep only tool results for the agent (no extra Intruct free-text output).
718
- Also compress payloads to reduce prompt tokens.
719
- """
720
- merged_web: List[Dict[str, str]] = []
721
- merged_img: List[Dict[str, str]] = []
938
+ def _format_search_msgs(self) -> str:
939
+ """Format search snippets only (not crawled pages)."""
940
+ if not self.all_web_results:
941
+ return ""
722
942
 
723
- for payload in search_payloads or []:
724
- try:
725
- obj = json.loads(payload)
726
- except Exception:
727
- continue
728
- merged_web.extend(obj.get("web_results") or [])
729
- merged_img.extend(obj.get("image_results") or [])
730
-
731
- def dedupe(items: List[Dict[str, str]]) -> List[Dict[str, str]]:
732
- seen = set()
733
- out = []
734
- for it in items:
735
- url = it.get("url") or ""
736
- if not url or url in seen:
737
- continue
738
- seen.add(url)
739
- out.append(it)
740
- return out
943
+ def clip(s: str, n: int) -> str:
944
+ s = (s or "").strip()
945
+ return s if len(s) <= n else s[: n - 1] + "…"
946
+
947
+ lines = []
948
+ for res in self.all_web_results:
949
+ if res.get("is_image"): continue # Skip images
950
+ if res.get("is_crawled"): continue # Skip crawled pages (handled separately)
951
+ idx = res.get("_id")
952
+ title = clip(res.get("title", ""), 80)
953
+ url = res.get("url", "")
954
+ content = clip(res.get("content", ""), 200)
955
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
956
+
957
+ return "\n".join(lines)
741
958
 
742
- merged_web = dedupe(merged_web)[:6]
743
- merged_img = dedupe(merged_img)[:3]
959
+ def _format_page_msgs(self) -> str:
960
+ """Format crawled page content (detailed)."""
961
+ if not self.all_web_results:
962
+ return ""
744
963
 
745
964
  def clip(s: str, n: int) -> str:
746
965
  s = (s or "").strip()
747
966
  return s if len(s) <= n else s[: n - 1] + "…"
748
967
 
749
- compact_web = [
750
- {"title": clip(r.get("title", ""), 80), "url": r.get("url", ""), "content": clip(r.get("content", ""), 180)}
751
- for r in merged_web
752
- ]
753
- compact_img = [{"title": clip(r.get("title", ""), 80), "url": r.get("url", "")} for r in merged_img]
968
+ lines = []
969
+ for res in self.all_web_results:
970
+ if not res.get("is_crawled"): continue # Only crawled pages
971
+ idx = res.get("_id")
972
+ title = clip(res.get("title", ""), 80)
973
+ url = res.get("url", "")
974
+ content = clip(res.get("content", ""), 1500) # More content for pages
975
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
976
+
977
+ return "\n".join(lines)
754
978
 
755
- return json.dumps({"web_results": compact_web, "image_results": compact_img}, ensure_ascii=False)
979
+ def _format_image_search_msgs(self) -> str:
980
+ if not self.all_web_results:
981
+ return ""
982
+
983
+ lines = []
984
+ for res in self.all_web_results:
985
+ if not res.get("is_image"): continue
986
+ idx = res.get("_id")
987
+ title = res.get("title", "")
988
+ url = res.get("image", "") or res.get("url", "")
989
+ thumb = res.get("thumbnail", "")
990
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nThumbnail: {thumb}\n")
991
+ return "\n".join(lines)
756
992
 
757
993
  def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
758
994
  if api_key or base_url:
@@ -791,13 +1027,6 @@ class ProcessingPipeline:
791
1027
  parts.append("## Intruct\n")
792
1028
  parts.append(f"- model: `{t.get('model')}`")
793
1029
  parts.append(f"- base_url: `{t.get('base_url')}`\n")
794
- parts.append(f"- grant_mcp_playwright: `{bool(t.get('grant_mcp_playwright'))}`")
795
- if t.get("grant_reason"):
796
- parts.append(f"- grant_reason: `{t.get('grant_reason')}`")
797
- if "explicit_mcp_intent" in t:
798
- parts.append(f"- explicit_mcp_intent: `{bool(t.get('explicit_mcp_intent'))}`")
799
- if "grant_effective" in t:
800
- parts.append(f"- grant_effective: `{bool(t.get('grant_effective'))}`\n")
801
1030
  parts.append("### Prompt\n")
802
1031
  parts.append(fence("text", t.get("prompt", "")))
803
1032
  if t.get("tool_calls"):
@@ -815,7 +1044,6 @@ class ProcessingPipeline:
815
1044
  parts.append("## Agent\n")
816
1045
  parts.append(f"- model: `{a.get('model')}`")
817
1046
  parts.append(f"- base_url: `{a.get('base_url')}`\n")
818
- parts.append(f"- mcp_granted: `{bool(a.get('mcp_granted'))}`\n")
819
1047
  parts.append("### System Prompt\n")
820
1048
  parts.append(fence("text", a.get("system_prompt", "")))
821
1049
  parts.append("\n### Steps\n")
@@ -827,14 +1055,11 @@ class ProcessingPipeline:
827
1055
 
828
1056
  async def close(self):
829
1057
  try:
830
- await self.mcp_playwright.close()
1058
+ await self.search_service.close()
1059
+ except Exception:
1060
+ pass
1061
+ try:
1062
+ from ..utils.search import close_shared_crawler
1063
+ await close_shared_crawler()
831
1064
  except Exception:
832
1065
  pass
833
-
834
- async def warmup_mcp(self) -> bool:
835
- ok = await self.mcp_playwright.ensure_connected()
836
- if ok:
837
- logger.info("MCP Playwright connected (warmup).")
838
- else:
839
- logger.warning("MCP Playwright warmup failed.")
840
- return ok