entari-plugin-hyw 3.2.113__py3-none-any.whl → 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (49) hide show
  1. entari_plugin_hyw/__init__.py +309 -758
  2. entari_plugin_hyw/hyw_core.py +700 -0
  3. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/METADATA +25 -17
  4. entari_plugin_hyw-3.3.1.dist-info/RECORD +6 -0
  5. entari_plugin_hyw/assets/icon/anthropic.svg +0 -1
  6. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  7. entari_plugin_hyw/assets/icon/gemini.svg +0 -1
  8. entari_plugin_hyw/assets/icon/google.svg +0 -1
  9. entari_plugin_hyw/assets/icon/grok.png +0 -0
  10. entari_plugin_hyw/assets/icon/microsoft.svg +0 -15
  11. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  12. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  13. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  14. entari_plugin_hyw/assets/icon/openai.svg +0 -1
  15. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/icon/perplexity.svg +0 -24
  17. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  18. entari_plugin_hyw/assets/icon/xai.png +0 -0
  19. entari_plugin_hyw/assets/icon/zai.png +0 -0
  20. entari_plugin_hyw/assets/libs/highlight.css +0 -10
  21. entari_plugin_hyw/assets/libs/highlight.js +0 -1213
  22. entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
  23. entari_plugin_hyw/assets/libs/katex.css +0 -1
  24. entari_plugin_hyw/assets/libs/katex.js +0 -1
  25. entari_plugin_hyw/assets/libs/tailwind.css +0 -1
  26. entari_plugin_hyw/assets/package-lock.json +0 -953
  27. entari_plugin_hyw/assets/package.json +0 -16
  28. entari_plugin_hyw/assets/tailwind.config.js +0 -12
  29. entari_plugin_hyw/assets/tailwind.input.css +0 -235
  30. entari_plugin_hyw/assets/template.html +0 -157
  31. entari_plugin_hyw/assets/template.html.bak +0 -157
  32. entari_plugin_hyw/assets/template.j2 +0 -259
  33. entari_plugin_hyw/core/__init__.py +0 -0
  34. entari_plugin_hyw/core/config.py +0 -36
  35. entari_plugin_hyw/core/history.py +0 -146
  36. entari_plugin_hyw/core/hyw.py +0 -41
  37. entari_plugin_hyw/core/pipeline.py +0 -840
  38. entari_plugin_hyw/core/render.py +0 -531
  39. entari_plugin_hyw/core/render.py.bak +0 -926
  40. entari_plugin_hyw/utils/__init__.py +0 -3
  41. entari_plugin_hyw/utils/browser.py +0 -61
  42. entari_plugin_hyw/utils/mcp_playwright.py +0 -128
  43. entari_plugin_hyw/utils/misc.py +0 -93
  44. entari_plugin_hyw/utils/playwright_tool.py +0 -46
  45. entari_plugin_hyw/utils/prompts.py +0 -94
  46. entari_plugin_hyw/utils/search.py +0 -193
  47. entari_plugin_hyw-3.2.113.dist-info/RECORD +0 -47
  48. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/WHEEL +0 -0
  49. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/top_level.txt +0 -0
@@ -1,840 +0,0 @@
1
- import asyncio
2
- import html
3
- import json
4
- import time
5
- from contextlib import asynccontextmanager
6
- from typing import Any, Dict, List, Optional, Tuple
7
-
8
- from loguru import logger
9
- from openai import AsyncOpenAI
10
-
11
- from .config import HYWConfig
12
- from ..utils.mcp_playwright import MCPPlaywrightManager
13
- from ..utils.search import SearchService
14
- from ..utils.prompts import (
15
- AGENT_SYSTEM_PROMPT,
16
- AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD,
17
- AGENT_SYSTEM_PROMPT_MCP_ADD,
18
- AGENT_SYSTEM_PROMPT_SEARCH_ADD,
19
- INTRUCT_SYSTEM_PROMPT,
20
- INTRUCT_SYSTEM_PROMPT_VISION_ADD,
21
- VISION_SYSTEM_PROMPT,
22
- )
23
-
24
- @asynccontextmanager
25
- async def _null_async_context():
26
- yield None
27
-
28
-
29
- class ProcessingPipeline:
30
- """
31
- Core pipeline (vision -> instruct/search -> agent).
32
- """
33
-
34
- def __init__(self, config: HYWConfig):
35
- self.config = config
36
- self.search_service = SearchService(config)
37
- self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
38
- # Build Playwright MCP args with headless flag if configured
39
- playwright_args = getattr(self.config, "playwright_mcp_args", None)
40
- if playwright_args is None:
41
- playwright_args = ["-y", "@playwright/mcp@latest"]
42
- # Add --headless flag if headless mode is enabled
43
- if getattr(self.config, "headless", True):
44
- playwright_args.append("--headless")
45
-
46
- self.mcp_playwright = MCPPlaywrightManager(
47
- command=getattr(self.config, "playwright_mcp_command", "npx"),
48
- args=playwright_args,
49
- )
50
-
51
- self.web_search_tool = {
52
- "type": "function",
53
- "function": {
54
- "name": "web_search",
55
- "description": "Search the web for text and images.",
56
- "parameters": {
57
- "type": "object",
58
- "properties": {"query": {"type": "string"}},
59
- "required": ["query"],
60
- },
61
- },
62
- }
63
- self.grant_mcp_playwright_tool = {
64
- "type": "function",
65
- "function": {
66
- "name": "grant_mcp_playwright",
67
- "description": "Decide whether to grant Playwright MCP browser tools to the agent for this request.",
68
- "parameters": {
69
- "type": "object",
70
- "properties": {
71
- "grant": {"type": "boolean"},
72
- "reason": {"type": "string"},
73
- },
74
- "required": ["grant"],
75
- },
76
- },
77
- }
78
-
79
- async def execute(
80
- self,
81
- user_input: str,
82
- conversation_history: List[Dict],
83
- model_name: str = None,
84
- images: List[str] = None,
85
- vision_model_name: str = None,
86
- selected_vision_model: str = None,
87
- ) -> Dict[str, Any]:
88
- """
89
- 1) Vision: summarize images once (no image persistence).
90
- 2) Intruct: run web_search and decide whether to grant Playwright MCP tools.
91
- 3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
92
- """
93
- start_time = time.time()
94
- stats = {"start_time": start_time, "tool_calls_count": 0}
95
- # Token usage tracking for billing
96
- usage_totals = {"input_tokens": 0, "output_tokens": 0}
97
- active_model = model_name or self.config.model_name
98
-
99
- current_history = conversation_history
100
- final_response_content = ""
101
- structured: Dict[str, Any] = {}
102
-
103
- try:
104
- logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
105
-
106
- trace: Dict[str, Any] = {
107
- "vision": None,
108
- "intruct": None,
109
- "agent": None,
110
- }
111
-
112
- # Vision stage
113
- vision_text = ""
114
- vision_start = time.time()
115
- vision_time = 0
116
- vision_cost = 0.0
117
- vision_usage = {}
118
- if images:
119
- vision_model = (
120
- selected_vision_model
121
- or vision_model_name
122
- or getattr(self.config, "vision_model_name", None)
123
- or active_model
124
- )
125
- vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SYSTEM_PROMPT
126
- vision_prompt = vision_prompt_tpl.format(user_msgs=user_input or "[图片]")
127
- vision_text, vision_usage = await self._run_vision_stage(
128
- user_input=user_input,
129
- images=images,
130
- model=vision_model,
131
- prompt=vision_prompt,
132
- )
133
- # Add vision usage with vision-specific pricing
134
- usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
135
- usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
136
-
137
- # Calculate Vision Cost
138
- v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
139
- v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
140
- if v_in_price > 0 or v_out_price > 0:
141
- vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
142
-
143
- vision_time = time.time() - vision_start
144
-
145
- trace["vision"] = {
146
- "model": vision_model,
147
- "base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
148
- "prompt": vision_prompt,
149
- "user_input": user_input or "",
150
- "images_count": len(images or []),
151
- "output": vision_text,
152
- "usage": vision_usage,
153
- "time": vision_time,
154
- "cost": vision_cost
155
- }
156
-
157
- # Intruct + pre-search
158
- instruct_start = time.time()
159
- instruct_model = getattr(self.config, "intruct_model_name", None) or active_model
160
- instruct_text, search_payloads, intruct_trace, intruct_usage, search_time = await self._run_instruct_stage(
161
- user_input=user_input,
162
- vision_text=vision_text,
163
- model=instruct_model,
164
- )
165
- instruct_time = time.time() - instruct_start
166
-
167
- # Calculate Instruct Cost
168
- instruct_cost = 0.0
169
- i_in_price = float(getattr(self.config, "intruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
170
- i_out_price = float(getattr(self.config, "intruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
171
- if i_in_price > 0 or i_out_price > 0:
172
- instruct_cost = (intruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (intruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
173
-
174
- # Add instruct usage
175
- usage_totals["input_tokens"] += intruct_usage.get("input_tokens", 0)
176
- usage_totals["output_tokens"] += intruct_usage.get("output_tokens", 0)
177
-
178
- intruct_trace["time"] = instruct_time
179
- intruct_trace["cost"] = instruct_cost
180
- trace["intruct"] = intruct_trace
181
-
182
- explicit_mcp_intent = "mcp" in (user_input or "").lower()
183
- grant_requested = bool(intruct_trace.get("grant_mcp_playwright", False))
184
- grant_mcp = bool(grant_requested and explicit_mcp_intent)
185
- intruct_trace["explicit_mcp_intent"] = explicit_mcp_intent
186
- intruct_trace["grant_effective"] = grant_mcp
187
- if grant_requested and not explicit_mcp_intent:
188
- logger.info("Intruct requested MCP grant, but user did not express MCP intent. Grant ignored.")
189
- if grant_mcp:
190
- logger.warning(f"MCP Playwright granted for this request: reason={intruct_trace.get('grant_reason')!r}")
191
-
192
- # Start agent loop
193
- agent_start_time = time.time()
194
- current_history.append({"role": "user", "content": user_input or "..."})
195
-
196
- max_steps = 6
197
- step = 0
198
- agent_trace_steps: List[Dict[str, Any]] = []
199
- last_system_prompt = ""
200
-
201
- mcp_tools_openai: Optional[List[Dict[str, Any]]] = None
202
- if grant_mcp:
203
- mcp_tools_openai = await self.mcp_playwright.tools_openai()
204
- if not mcp_tools_openai:
205
- logger.warning("MCP Playwright was granted but tools are unavailable (connect failed).")
206
- grant_mcp = False
207
-
208
- # Agent loop - always runs regardless of MCP grant status
209
- while step < max_steps:
210
- step += 1
211
- logger.info(f"Pipeline: Agent step {step}/{max_steps}")
212
-
213
- if step == 5:
214
- current_history.append(
215
- {
216
- "role": "system",
217
- "content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
218
- }
219
- )
220
-
221
- agent_tools = mcp_tools_openai if grant_mcp else None
222
- tools_desc = "\n".join([t["function"]["name"] for t in (agent_tools or [])]) if agent_tools else ""
223
-
224
- user_msgs_text = user_input or ""
225
-
226
- search_msgs_text = self._format_search_msgs(search_payloads)
227
- has_search_results = bool(search_payloads) # Only append if search was actually performed
228
-
229
- # Build agent system prompt with modular ADD sections
230
- agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SYSTEM_PROMPT
231
- system_prompt = agent_prompt_tpl.format(user_msgs=user_msgs_text)
232
-
233
- # Append vision text if available
234
- if vision_text:
235
- system_prompt += AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
236
-
237
- # Append search results if search was performed and has results
238
- if has_search_results:
239
- system_prompt += AGENT_SYSTEM_PROMPT_SEARCH_ADD.format(search_msgs=search_msgs_text)
240
-
241
- # Append MCP addon prompt when MCP is granted
242
- if grant_mcp and tools_desc:
243
- system_prompt += AGENT_SYSTEM_PROMPT_MCP_ADD.format(tools_desc=tools_desc)
244
-
245
- last_system_prompt = system_prompt
246
-
247
- messages = [{"role": "system", "content": system_prompt}]
248
- messages.extend(current_history)
249
-
250
- tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
251
- response, step_usage = await self._safe_llm_call(
252
- messages=messages,
253
- model=active_model,
254
- tools=tools_for_step,
255
- tool_choice="auto" if tools_for_step else None,
256
- )
257
- # Accumulate agent usage
258
- usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
259
- usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
260
-
261
- if response.tool_calls and tools_for_step:
262
- tool_calls = response.tool_calls
263
- stats["tool_calls_count"] += len(tool_calls)
264
-
265
- plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
266
- current_history.append(plan_dict)
267
-
268
- tasks = [self._safe_route_tool(tc, mcp_session=self.mcp_playwright if grant_mcp else None) for tc in tool_calls]
269
- results = await asyncio.gather(*tasks)
270
-
271
- step_trace = {
272
- "step": step,
273
- "tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
274
- "tool_results": [],
275
- }
276
- for i, result in enumerate(results):
277
- tc = tool_calls[i]
278
- step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
279
- current_history.append(
280
- {
281
- "tool_call_id": tc.id,
282
- "role": "tool",
283
- "name": tc.function.name,
284
- "content": str(result),
285
- }
286
- )
287
- agent_trace_steps.append(step_trace)
288
- continue
289
-
290
- final_response_content = response.content or ""
291
- current_history.append({"role": "assistant", "content": final_response_content})
292
- agent_trace_steps.append({"step": step, "final": True, "output": final_response_content})
293
- break
294
-
295
- if not final_response_content:
296
- final_response_content = "执行结束,但未生成内容。"
297
-
298
- structured = self._parse_tagged_response(final_response_content)
299
- final_content = structured.get("response") or final_response_content
300
-
301
- agent_time = time.time() - agent_start_time
302
-
303
- # Calculate Agent Cost (accumulated steps)
304
- agent_cost = 0.0
305
- a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
306
- a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
307
-
308
- # Agent usage is already in usage_totals, but that includes ALL stages.
309
- # We need just Agent tokens for Agent cost.
310
- # Agent inputs = Total inputs - Vision inputs - Instruct inputs
311
- agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - intruct_usage.get("input_tokens", 0)
312
- agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - intruct_usage.get("output_tokens", 0)
313
-
314
- if a_in_price > 0 or a_out_price > 0:
315
- agent_cost = (agent_input_tokens / 1_000_000 * a_in_price) + (agent_output_tokens / 1_000_000 * a_out_price)
316
-
317
- trace["agent"] = {
318
- "model": active_model,
319
- "base_url": self.config.base_url,
320
- "system_prompt": last_system_prompt,
321
- "steps": agent_trace_steps,
322
- "final_output": final_response_content,
323
- "mcp_granted": grant_mcp,
324
- "time": agent_time,
325
- "cost": agent_cost
326
- }
327
- trace_markdown = self._render_trace_markdown(trace)
328
-
329
- stats["total_time"] = time.time() - start_time
330
- stats["steps"] = step
331
-
332
- # Calculate billing info
333
- billing_info = {
334
- "input_tokens": usage_totals["input_tokens"],
335
- "output_tokens": usage_totals["output_tokens"],
336
- "total_cost": 0.0,
337
- }
338
- # Calculate cost if any pricing is configured
339
- input_price = getattr(self.config, "input_price", None) or 0.0
340
- output_price = getattr(self.config, "output_price", None) or 0.0
341
-
342
- if input_price > 0 or output_price > 0:
343
- # Price is per million tokens
344
- input_cost = (usage_totals["input_tokens"] / 1_000_000) * input_price
345
- output_cost = (usage_totals["output_tokens"] / 1_000_000) * output_price
346
- billing_info["total_cost"] = input_cost + output_cost
347
- # logger.info(f"Billing: {usage_totals['input_tokens']} in @ ${input_price}/M + {usage_totals['output_tokens']} out @ ${output_price}/M = ${billing_info['total_cost']:.6f}")
348
-
349
- # Build stages_used list for UI display
350
- # Order: Vision (if used) -> Search (if performed) -> Agent
351
- stages_used = []
352
-
353
- # Helper to infer icon from model name or base_url
354
- def infer_icon(model_name: str, base_url: str) -> str:
355
- model_lower = (model_name or "").lower()
356
- url_lower = (base_url or "").lower()
357
-
358
- if "deepseek" in model_lower or "deepseek" in url_lower:
359
- return "deepseek"
360
- elif "claude" in model_lower or "anthropic" in url_lower:
361
- return "anthropic"
362
- elif "gemini" in model_lower or "google" in url_lower:
363
- return "google"
364
- elif "gpt" in model_lower or "openai" in url_lower:
365
- return "openai"
366
- elif "qwen" in model_lower:
367
- return "qwen"
368
- elif "openrouter" in url_lower:
369
- return "openrouter"
370
- return "openai" # Default fallback
371
-
372
- # Helper to infer provider from base_url
373
- def infer_provider(base_url: str) -> str:
374
- url_lower = (base_url or "").lower()
375
- if "openrouter" in url_lower:
376
- return "OpenRouter"
377
- elif "openai" in url_lower:
378
- return "OpenAI"
379
- elif "anthropic" in url_lower:
380
- return "Anthropic"
381
- elif "google" in url_lower:
382
- return "Google"
383
- elif "deepseek" in url_lower:
384
- return "DeepSeek"
385
- return "" # Empty string = don't show provider
386
-
387
- if trace.get("vision"):
388
- v = trace["vision"]
389
- v_model = v.get("model", "")
390
- v_base_url = v.get("base_url", "") or self.config.base_url
391
- stages_used.append({
392
- "name": "Vision",
393
- "model": v_model,
394
- "icon_config": getattr(self.config, "vision_icon", None) or infer_icon(v_model, v_base_url),
395
- "provider": infer_provider(v_base_url),
396
- "time": v.get("time", 0),
397
- "cost": v.get("cost", 0.0)
398
- })
399
-
400
- if trace.get("intruct"):
401
- i = trace["intruct"]
402
- i_model = i.get("model", "")
403
- i_base_url = i.get("base_url", "") or self.config.base_url
404
- stages_used.append({
405
- "name": "Instruct",
406
- "model": i_model,
407
- "icon_config": getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
408
- "provider": infer_provider(i_base_url),
409
- "time": i.get("time", 0),
410
- "cost": i.get("cost", 0.0)
411
- })
412
-
413
- # Show Search stage only when search was actually performed
414
- if search_payloads:
415
- # Use dedicated SearXNG metadata as requested
416
- stages_used.append({
417
- "name": "Search",
418
- "model": "SearXNG",
419
- "icon_config": "search", # Ensure mapping exists or handle specially in render
420
- "provider": "SearXNG",
421
- "time": search_time,
422
- "cost": 0.0 # Search is free in this plugin
423
- })
424
-
425
- if trace.get("agent"):
426
- a = trace["agent"]
427
- a_model = a.get("model", "") or active_model
428
- a_base_url = a.get("base_url", "") or self.config.base_url
429
- stages_used.append({
430
- "name": "Agent",
431
- "model": a_model,
432
- "icon_config": getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url),
433
- "provider": infer_provider(a_base_url),
434
- "time": a.get("time", 0),
435
- "cost": a.get("cost", 0.0)
436
- })
437
-
438
- return {
439
- "llm_response": final_content,
440
- "structured_response": structured,
441
- "stats": stats,
442
- "model_used": active_model,
443
- "vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
444
- "conversation_history": current_history,
445
- "trace_markdown": trace_markdown,
446
- "billing_info": billing_info,
447
- "stages_used": stages_used,
448
- }
449
-
450
- except Exception as e:
451
- logger.exception("Pipeline Critical Failure")
452
- return {
453
- "llm_response": f"I encountered a critical error: {e}",
454
- "stats": stats,
455
- "error": str(e),
456
- }
457
-
458
- async def _safe_route_tool(self, tool_call, mcp_session=None):
459
- """Wrapper for safe concurrent execution."""
460
- try:
461
- return await asyncio.wait_for(self._route_tool(tool_call, mcp_session=mcp_session), timeout=15.0)
462
- except asyncio.TimeoutError:
463
- return "Error: Tool execution timed out (15s limit)."
464
- except Exception as e:
465
- return f"Error: Tool execution failed: {e}"
466
-
467
- def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
468
- """Parse response for references and mcp blocks."""
469
- parsed = {"response": "", "references": [], "mcp_steps": []}
470
- if not text:
471
- return parsed
472
-
473
- import re
474
-
475
- remaining_text = text
476
-
477
- # Parse references block
478
- ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
479
- if ref_block_match:
480
- ref_content = ref_block_match.group(1).strip()
481
- for line in ref_content.split("\n"):
482
- line = line.strip()
483
- link_match = re.search(r"\[(.*?)\]\((.*?)\)", line)
484
- if link_match:
485
- parsed["references"].append({"title": link_match.group(1), "url": link_match.group(2)})
486
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
487
-
488
- # Parse mcp block - supports format:
489
- # [icon] tool_name
490
- # description
491
- mcp_block_match = re.search(r'```mcp\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
492
- if mcp_block_match:
493
- mcp_content = mcp_block_match.group(1).strip()
494
- lines = mcp_content.split("\n")
495
- current_step = None
496
-
497
- for line in lines:
498
- line_stripped = line.strip()
499
- if not line_stripped: continue
500
-
501
- # New Format: "1. [icon] name: description" OR "[icon] name: description"
502
- # Regex details:
503
- # ^(?:(?:\d+\.|[-*])\s+)? -> Optional numbering (1. or - or *)
504
- # \[(\w+)\] -> Icon in brackets [icon] -> group 1
505
- # \s+ -> separating space
506
- # ([^:]+) -> Tool Name (chars before colon) -> group 2
507
- # : -> Colon separator
508
- # \s*(.+) -> Description -> group 3
509
- new_format_match = re.match(r'^(?:(?:\d+\.|[-*])\s+)?\[(\w+)\]\s+([^:]+):\s*(.+)$', line_stripped)
510
-
511
- # Old/Flexible Format: "[icon] name" (description might be on next line)
512
- flexible_match = re.match(r'^(?:(?:\d+\.|[-*])\s+)?\[(\w+)\]\s+(.+)$', line_stripped)
513
-
514
- if new_format_match:
515
- if current_step: parsed["mcp_steps"].append(current_step)
516
- current_step = {
517
- "icon": new_format_match.group(1).lower(),
518
- "name": new_format_match.group(2).strip(),
519
- "description": new_format_match.group(3).strip()
520
- }
521
- elif flexible_match:
522
- # Could be just "[icon] name" without description, or mixed
523
- if current_step: parsed["mcp_steps"].append(current_step)
524
- current_step = {
525
- "icon": flexible_match.group(1).lower(),
526
- "name": flexible_match.group(2).strip(),
527
- "description": ""
528
- }
529
- elif line.startswith(" ") and current_step:
530
- # Indented description line (continuation)
531
- if current_step["description"]:
532
- current_step["description"] += " " + line.strip()
533
- else:
534
- current_step["description"] = line.strip()
535
- elif line_stripped and not line_stripped.startswith("[") and current_step is None:
536
- # Plain text line without icon, treat as name if no current step
537
- # (This handles cases where LLM forgets brackets but lists steps)
538
- if current_step: parsed["mcp_steps"].append(current_step)
539
- current_step = {
540
- "icon": "default",
541
- "name": line_stripped,
542
- "description": ""
543
- }
544
-
545
- if current_step:
546
- parsed["mcp_steps"].append(current_step)
547
- remaining_text = remaining_text.replace(mcp_block_match.group(0), "").strip()
548
-
549
- parsed["response"] = remaining_text.strip()
550
- return parsed
551
-
552
- async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None):
553
- """
554
- Wrap LLM calls with timeout and error handling.
555
- Returns a tuple of (message, usage_dict) where usage_dict contains input_tokens and output_tokens.
556
- """
557
- try:
558
- return await asyncio.wait_for(
559
- self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client),
560
- timeout=120.0,
561
- )
562
- except asyncio.TimeoutError:
563
- logger.error("LLM Call Timed Out")
564
- return type("obj", (object,), {"content": "Error: The model took too long to respond.", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
565
- except Exception as e:
566
- logger.error(f"LLM Call Failed: {e}")
567
- return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
568
-
569
- async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI):
570
- try:
571
- payload_debug = json.dumps(messages)
572
- logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
573
- except Exception:
574
- pass
575
-
576
- t0 = time.time()
577
- logger.info("LLM Request SENT to API...")
578
- response = await client.chat.completions.create(
579
- model=model,
580
- messages=messages,
581
- tools=tools,
582
- tool_choice=tool_choice,
583
- temperature=self.config.temperature,
584
- )
585
- logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
586
-
587
- # Extract usage information
588
- usage = {"input_tokens": 0, "output_tokens": 0}
589
- if hasattr(response, "usage") and response.usage:
590
- usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
591
- usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
592
-
593
- return response.choices[0].message, usage
594
-
595
- async def _route_tool(self, tool_call, mcp_session=None):
596
- name = tool_call.function.name
597
- args = json.loads(html.unescape(tool_call.function.arguments))
598
-
599
- if name == "web_search":
600
- query = args.get("query")
601
- text_task = self.search_service.search(query)
602
- image_task = self.search_service.image_search(query)
603
- results = await asyncio.gather(text_task, image_task)
604
- return json.dumps({"web_results": results[0], "image_results": results[1][:5]}, ensure_ascii=False)
605
-
606
- if name == "grant_mcp_playwright":
607
- return "OK" # Minimal response, LLM already knows what it passed
608
-
609
- if mcp_session is not None and name.startswith("browser_"):
610
- return await mcp_session.call_tool_text(name, args or {})
611
-
612
- return f"Unknown tool {name}"
613
-
614
- async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
615
- """Returns (vision_text, usage_dict)."""
616
- content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
617
- for img_b64 in images:
618
- url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
619
- content_payload.append({"type": "image_url", "image_url": {"url": url}})
620
-
621
- client = self._client_for(
622
- api_key=getattr(self.config, "vision_api_key", None),
623
- base_url=getattr(self.config, "vision_base_url", None),
624
- )
625
- response, usage = await self._safe_llm_call(
626
- messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
627
- model=model,
628
- client=client,
629
- )
630
- return (response.content or "").strip(), usage
631
-
632
- async def _run_instruct_stage(
633
- self, user_input: str, vision_text: str, model: str
634
- ) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
635
- """Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
636
- tools = [self.web_search_tool, self.grant_mcp_playwright_tool]
637
- tools_desc = "\n".join([t["function"]["name"] for t in tools])
638
-
639
- prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SYSTEM_PROMPT
640
- prompt = prompt_tpl.format(user_msgs=user_input or "", tools_desc=tools_desc)
641
- if vision_text:
642
- prompt = f"{prompt}\\n\\n{INTRUCT_SYSTEM_PROMPT_VISION_ADD.format(vision_msgs=vision_text)}"
643
-
644
- client = self._client_for(
645
- api_key=getattr(self.config, "intruct_api_key", None),
646
- base_url=getattr(self.config, "intruct_base_url", None),
647
- )
648
-
649
- history: List[Dict[str, Any]] = [
650
- {"role": "system", "content": prompt},
651
- {"role": "user", "content": user_input or "..."},
652
- ]
653
-
654
- response, usage = await self._safe_llm_call(
655
- messages=history,
656
- model=model,
657
- tools=tools,
658
- tool_choice="auto",
659
- client=client,
660
- )
661
-
662
- search_payloads: List[str] = []
663
- intruct_trace: Dict[str, Any] = {
664
- "model": model,
665
- "base_url": getattr(self.config, "intruct_base_url", None) or self.config.base_url,
666
- "prompt": prompt,
667
- "user_input": user_input or "",
668
- "vision_add": vision_text or "",
669
- "grant_mcp_playwright": False,
670
- "grant_reason": "",
671
- "tool_calls": [],
672
- "tool_results": [],
673
- "output": "",
674
- }
675
-
676
- search_time = 0.0
677
-
678
- if response.tool_calls:
679
- plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
680
- history.append(plan_dict)
681
-
682
- tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
683
-
684
- # Measure search/tool execution time
685
- st = time.time()
686
- results = await asyncio.gather(*tasks)
687
- search_time = time.time() - st
688
-
689
- for i, result in enumerate(results):
690
- tc = response.tool_calls[i]
691
- history.append(
692
- {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
693
- )
694
- intruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
695
- intruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
696
- if tc.function.name == "web_search":
697
- search_payloads.append(str(result))
698
- elif tc.function.name == "grant_mcp_playwright":
699
- try:
700
- args = json.loads(html.unescape(tc.function.arguments))
701
- except Exception:
702
- args = {}
703
- intruct_trace["grant_mcp_playwright"] = bool(args.get("grant"))
704
- intruct_trace["grant_reason"] = str(args.get("reason") or "")
705
- # No second LLM call: tool-call arguments already include the extracted keywords/query
706
- # and the grant decision; avoid wasting tokens/time.
707
- intruct_trace["output"] = ""
708
- intruct_trace["usage"] = usage
709
- return "", search_payloads, intruct_trace, usage, search_time
710
-
711
- intruct_trace["output"] = (response.content or "").strip()
712
- intruct_trace["usage"] = usage
713
- return "", search_payloads, intruct_trace, usage, 0.0
714
-
715
- def _format_search_msgs(self, search_payloads: List[str]) -> str:
716
- """
717
- Keep only tool results for the agent (no extra Intruct free-text output).
718
- Also compress payloads to reduce prompt tokens.
719
- """
720
- merged_web: List[Dict[str, str]] = []
721
- merged_img: List[Dict[str, str]] = []
722
-
723
- for payload in search_payloads or []:
724
- try:
725
- obj = json.loads(payload)
726
- except Exception:
727
- continue
728
- merged_web.extend(obj.get("web_results") or [])
729
- merged_img.extend(obj.get("image_results") or [])
730
-
731
- def dedupe(items: List[Dict[str, str]]) -> List[Dict[str, str]]:
732
- seen = set()
733
- out = []
734
- for it in items:
735
- url = it.get("url") or ""
736
- if not url or url in seen:
737
- continue
738
- seen.add(url)
739
- out.append(it)
740
- return out
741
-
742
- merged_web = dedupe(merged_web)[:6]
743
- merged_img = dedupe(merged_img)[:3]
744
-
745
- def clip(s: str, n: int) -> str:
746
- s = (s or "").strip()
747
- return s if len(s) <= n else s[: n - 1] + "…"
748
-
749
- compact_web = [
750
- {"title": clip(r.get("title", ""), 80), "url": r.get("url", ""), "content": clip(r.get("content", ""), 180)}
751
- for r in merged_web
752
- ]
753
- compact_img = [{"title": clip(r.get("title", ""), 80), "url": r.get("url", "")} for r in merged_img]
754
-
755
- return json.dumps({"web_results": compact_web, "image_results": compact_img}, ensure_ascii=False)
756
-
757
- def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
758
- if api_key or base_url:
759
- return AsyncOpenAI(base_url=base_url or self.config.base_url, api_key=api_key or self.config.api_key)
760
- return self.client
761
-
762
- def _tool_call_to_trace(self, tool_call) -> Dict[str, Any]:
763
- try:
764
- args = json.loads(html.unescape(tool_call.function.arguments))
765
- except Exception:
766
- args = tool_call.function.arguments
767
- return {"id": getattr(tool_call, "id", None), "name": tool_call.function.name, "arguments": args}
768
-
769
- def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
770
- def fence(label: str, content: str) -> str:
771
- safe = (content or "").replace("```", "``\\`")
772
- return f"```{label}\n{safe}\n```"
773
-
774
- parts: List[str] = []
775
- parts.append("# Pipeline Trace\n")
776
-
777
- if trace.get("vision"):
778
- v = trace["vision"]
779
- parts.append("## Vision\n")
780
- parts.append(f"- model: `{v.get('model')}`")
781
- parts.append(f"- base_url: `{v.get('base_url')}`")
782
- parts.append(f"- images_count: `{v.get('images_count')}`\n")
783
- parts.append("### Prompt\n")
784
- parts.append(fence("text", v.get("prompt", "")))
785
- parts.append("\n### Output\n")
786
- parts.append(fence("text", v.get("output", "")))
787
- parts.append("")
788
-
789
- if trace.get("intruct"):
790
- t = trace["intruct"]
791
- parts.append("## Intruct\n")
792
- parts.append(f"- model: `{t.get('model')}`")
793
- parts.append(f"- base_url: `{t.get('base_url')}`\n")
794
- parts.append(f"- grant_mcp_playwright: `{bool(t.get('grant_mcp_playwright'))}`")
795
- if t.get("grant_reason"):
796
- parts.append(f"- grant_reason: `{t.get('grant_reason')}`")
797
- if "explicit_mcp_intent" in t:
798
- parts.append(f"- explicit_mcp_intent: `{bool(t.get('explicit_mcp_intent'))}`")
799
- if "grant_effective" in t:
800
- parts.append(f"- grant_effective: `{bool(t.get('grant_effective'))}`\n")
801
- parts.append("### Prompt\n")
802
- parts.append(fence("text", t.get("prompt", "")))
803
- if t.get("tool_calls"):
804
- parts.append("\n### Tool Calls\n")
805
- parts.append(fence("json", json.dumps(t.get("tool_calls"), ensure_ascii=False, indent=2)))
806
- if t.get("tool_results"):
807
- parts.append("\n### Tool Results\n")
808
- parts.append(fence("json", json.dumps(t.get("tool_results"), ensure_ascii=False, indent=2)))
809
- parts.append("\n### Output\n")
810
- parts.append(fence("text", t.get("output", "")))
811
- parts.append("")
812
-
813
- if trace.get("agent"):
814
- a = trace["agent"]
815
- parts.append("## Agent\n")
816
- parts.append(f"- model: `{a.get('model')}`")
817
- parts.append(f"- base_url: `{a.get('base_url')}`\n")
818
- parts.append(f"- mcp_granted: `{bool(a.get('mcp_granted'))}`\n")
819
- parts.append("### System Prompt\n")
820
- parts.append(fence("text", a.get("system_prompt", "")))
821
- parts.append("\n### Steps\n")
822
- parts.append(fence("json", json.dumps(a.get("steps", []), ensure_ascii=False, indent=2)))
823
- parts.append("\n### Final Output\n")
824
- parts.append(fence("text", a.get("final_output", "")))
825
-
826
- return "\n".join(parts).strip() + "\n"
827
-
828
- async def close(self):
829
- try:
830
- await self.mcp_playwright.close()
831
- except Exception:
832
- pass
833
-
834
- async def warmup_mcp(self) -> bool:
835
- ok = await self.mcp_playwright.ensure_connected()
836
- if ok:
837
- logger.info("MCP Playwright connected (warmup).")
838
- else:
839
- logger.warning("MCP Playwright warmup failed.")
840
- return ok