entari-plugin-hyw 2.2.5__py3-none-any.whl → 3.5.0rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- entari_plugin_hyw/__init__.py +371 -315
- entari_plugin_hyw/assets/card-dist/index.html +396 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
- entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +1 -0
- entari_plugin_hyw/assets/icon/google.svg +1 -0
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/huggingface.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +1 -0
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +412 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +386 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +237 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +29 -0
- entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
- entari_plugin_hyw/card-ui/src/types.ts +52 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/history.py +170 -0
- entari_plugin_hyw/image_cache.py +274 -0
- entari_plugin_hyw/misc.py +128 -0
- entari_plugin_hyw/pipeline.py +1338 -0
- entari_plugin_hyw/prompts.py +108 -0
- entari_plugin_hyw/render_vue.py +314 -0
- entari_plugin_hyw/search.py +696 -0
- entari_plugin_hyw-3.5.0rc6.dist-info/METADATA +116 -0
- entari_plugin_hyw-3.5.0rc6.dist-info/RECORD +88 -0
- entari_plugin_hyw/hyw_core.py +0 -555
- entari_plugin_hyw-2.2.5.dist-info/METADATA +0 -135
- entari_plugin_hyw-2.2.5.dist-info/RECORD +0 -6
- {entari_plugin_hyw-2.2.5.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-2.2.5.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1338 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import html
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from openai import AsyncOpenAI
|
|
11
|
+
|
|
12
|
+
from .search import SearchService
|
|
13
|
+
from .image_cache import get_cached_images
|
|
14
|
+
from .prompts import (
|
|
15
|
+
AGENT_SP,
|
|
16
|
+
AGENT_SP_INSTRUCT_VISION_ADD,
|
|
17
|
+
AGENT_SP_TOOLS_STANDARD_ADD,
|
|
18
|
+
AGENT_SP_TOOLS_AGENT_ADD,
|
|
19
|
+
AGENT_SP_SEARCH_ADD,
|
|
20
|
+
INSTRUCT_SP,
|
|
21
|
+
INSTRUCT_SP_VISION_ADD,
|
|
22
|
+
VISION_SP,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
@asynccontextmanager
|
|
26
|
+
async def _null_async_context():
|
|
27
|
+
yield None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ProcessingPipeline:
|
|
31
|
+
"""
|
|
32
|
+
Core pipeline (vision -> instruct/search -> agent).
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, config: Any):
|
|
36
|
+
self.config = config
|
|
37
|
+
self.search_service = SearchService(config)
|
|
38
|
+
self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
|
|
39
|
+
self.all_web_results = [] # Cache for search results
|
|
40
|
+
self.current_mode = "standard" # standard | agent
|
|
41
|
+
# Global ID counter for all types (unified numbering)
|
|
42
|
+
self.global_id_counter = 0
|
|
43
|
+
# Background tasks for async image search (not blocking agent)
|
|
44
|
+
self._image_search_tasks: List[asyncio.Task] = []
|
|
45
|
+
|
|
46
|
+
self.web_search_tool = {
|
|
47
|
+
"type": "function",
|
|
48
|
+
"function": {
|
|
49
|
+
"name": "internal_web_search",
|
|
50
|
+
"description": "Search the web for text.",
|
|
51
|
+
"parameters": {
|
|
52
|
+
"type": "object",
|
|
53
|
+
"properties": {"query": {"type": "string"}},
|
|
54
|
+
"required": ["query"],
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
}
|
|
58
|
+
self.image_search_tool = {
|
|
59
|
+
"type": "function",
|
|
60
|
+
"function": {
|
|
61
|
+
"name": "internal_image_search",
|
|
62
|
+
"description": "Search for images related to a query.",
|
|
63
|
+
"parameters": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {"query": {"type": "string"}},
|
|
66
|
+
"required": ["query"],
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
self.set_mode_tool = {
|
|
71
|
+
"type": "function",
|
|
72
|
+
"function": {
|
|
73
|
+
"name": "set_mode",
|
|
74
|
+
"description": "设定后续 Agent 的运行模式: standard | agent",
|
|
75
|
+
"parameters": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"properties": {
|
|
78
|
+
"mode": {"type": "string", "enum": ["standard", "agent"]},
|
|
79
|
+
"reason": {"type": "string"},
|
|
80
|
+
},
|
|
81
|
+
"required": ["mode"],
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
self.crawl_page_tool = {
|
|
86
|
+
"type": "function",
|
|
87
|
+
"function": {
|
|
88
|
+
"name": "crawl_page",
|
|
89
|
+
"description": "使用 Crawl4AI 抓取网页并返回 Markdown 文本。",
|
|
90
|
+
"parameters": {
|
|
91
|
+
"type": "object",
|
|
92
|
+
"properties": {
|
|
93
|
+
"url": {"type": "string"},
|
|
94
|
+
},
|
|
95
|
+
"required": ["url"],
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
self.refuse_answer_tool = {
|
|
100
|
+
"type": "function",
|
|
101
|
+
"function": {
|
|
102
|
+
"name": "refuse_answer",
|
|
103
|
+
"description": "拒绝回答问题。当用户问题涉及敏感、违规、不适宜内容时调用此工具,立即终止流程并返回拒绝回答的图片。",
|
|
104
|
+
"parameters": {
|
|
105
|
+
"type": "object",
|
|
106
|
+
"properties": {
|
|
107
|
+
"reason": {"type": "string", "description": "拒绝回答的原因(内部记录,不展示给用户)"},
|
|
108
|
+
},
|
|
109
|
+
"required": [],
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
}
|
|
113
|
+
# Flag to indicate refuse_answer was called
|
|
114
|
+
self._should_refuse = False
|
|
115
|
+
self._refuse_reason = ""
|
|
116
|
+
|
|
117
|
+
async def execute(
|
|
118
|
+
self,
|
|
119
|
+
user_input: str,
|
|
120
|
+
conversation_history: List[Dict],
|
|
121
|
+
model_name: str = None,
|
|
122
|
+
images: List[str] = None,
|
|
123
|
+
vision_model_name: str = None,
|
|
124
|
+
selected_vision_model: str = None,
|
|
125
|
+
) -> Dict[str, Any]:
|
|
126
|
+
"""
|
|
127
|
+
1) Vision: summarize images once (no image persistence).
|
|
128
|
+
2) Instruct: run web_search and decide whether to grant Playwright MCP tools.
|
|
129
|
+
3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
|
|
130
|
+
"""
|
|
131
|
+
start_time = time.time()
|
|
132
|
+
stats = {"start_time": start_time, "tool_calls_count": 0}
|
|
133
|
+
# Token usage tracking for billing
|
|
134
|
+
usage_totals = {"input_tokens": 0, "output_tokens": 0}
|
|
135
|
+
active_model = model_name or self.config.model_name
|
|
136
|
+
|
|
137
|
+
current_history = conversation_history
|
|
138
|
+
final_response_content = ""
|
|
139
|
+
structured: Dict[str, Any] = {}
|
|
140
|
+
|
|
141
|
+
# Reset search cache and ID counter for this execution
|
|
142
|
+
self.all_web_results = []
|
|
143
|
+
self.global_id_counter = 0
|
|
144
|
+
# Reset refuse_answer flag
|
|
145
|
+
self._should_refuse = False
|
|
146
|
+
self._refuse_reason = ""
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
|
|
150
|
+
|
|
151
|
+
trace: Dict[str, Any] = {
|
|
152
|
+
"vision": None,
|
|
153
|
+
"instruct": None,
|
|
154
|
+
"agent": None,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# Vision stage
|
|
158
|
+
vision_text = ""
|
|
159
|
+
vision_start = time.time()
|
|
160
|
+
vision_time = 0
|
|
161
|
+
vision_cost = 0.0
|
|
162
|
+
vision_usage = {}
|
|
163
|
+
if images:
|
|
164
|
+
vision_model = (
|
|
165
|
+
selected_vision_model
|
|
166
|
+
or vision_model_name
|
|
167
|
+
or getattr(self.config, "vision_model_name", None)
|
|
168
|
+
or active_model
|
|
169
|
+
)
|
|
170
|
+
vision_prompt = VISION_SP.format(user_msgs=user_input or "[图片]")
|
|
171
|
+
vision_text, vision_usage = await self._run_vision_stage(
|
|
172
|
+
user_input=user_input,
|
|
173
|
+
images=images,
|
|
174
|
+
model=vision_model,
|
|
175
|
+
prompt=vision_prompt,
|
|
176
|
+
)
|
|
177
|
+
# Add vision usage with vision-specific pricing
|
|
178
|
+
usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
|
|
179
|
+
usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
|
|
180
|
+
|
|
181
|
+
# Calculate Vision Cost
|
|
182
|
+
v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
183
|
+
v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
184
|
+
if v_in_price > 0 or v_out_price > 0:
|
|
185
|
+
vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
|
|
186
|
+
|
|
187
|
+
vision_time = time.time() - vision_start
|
|
188
|
+
|
|
189
|
+
trace["vision"] = {
|
|
190
|
+
"model": vision_model,
|
|
191
|
+
"base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
|
|
192
|
+
"prompt": vision_prompt,
|
|
193
|
+
"user_input": user_input or "",
|
|
194
|
+
"images_count": len(images or []),
|
|
195
|
+
"output": vision_text,
|
|
196
|
+
"usage": vision_usage,
|
|
197
|
+
"time": vision_time,
|
|
198
|
+
"cost": vision_cost
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Instruct + pre-search
|
|
202
|
+
instruct_start = time.time()
|
|
203
|
+
instruct_model = getattr(self.config, "instruct_model_name", None) or active_model
|
|
204
|
+
logger.info(f"Instruct Stage Config: instruct_model_name={getattr(self.config, 'instruct_model_name', None)}, active_model={active_model}, using: {instruct_model}")
|
|
205
|
+
instruct_text, search_payloads, instruct_trace, instruct_usage, search_time = await self._run_instruct_stage(
|
|
206
|
+
user_input=user_input,
|
|
207
|
+
vision_text=vision_text,
|
|
208
|
+
model=instruct_model,
|
|
209
|
+
)
|
|
210
|
+
# Instruct time excludes search time (search_time is returned separately)
|
|
211
|
+
instruct_time = time.time() - instruct_start - search_time
|
|
212
|
+
|
|
213
|
+
# Calculate Instruct Cost
|
|
214
|
+
instruct_cost = 0.0
|
|
215
|
+
i_in_price = float(getattr(self.config, "instruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
216
|
+
i_out_price = float(getattr(self.config, "instruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
217
|
+
if i_in_price > 0 or i_out_price > 0:
|
|
218
|
+
instruct_cost = (instruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (instruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
|
|
219
|
+
|
|
220
|
+
# Add instruct usage
|
|
221
|
+
usage_totals["input_tokens"] += instruct_usage.get("input_tokens", 0)
|
|
222
|
+
usage_totals["output_tokens"] += instruct_usage.get("output_tokens", 0)
|
|
223
|
+
|
|
224
|
+
instruct_trace["time"] = instruct_time
|
|
225
|
+
instruct_trace["cost"] = instruct_cost
|
|
226
|
+
trace["instruct"] = instruct_trace
|
|
227
|
+
|
|
228
|
+
# Check if refuse_answer was called - terminate early
|
|
229
|
+
if self._should_refuse:
|
|
230
|
+
logger.info(f"Pipeline: refuse_answer triggered. Reason: {self._refuse_reason}")
|
|
231
|
+
stats["total_time"] = time.time() - start_time
|
|
232
|
+
return {
|
|
233
|
+
"llm_response": "",
|
|
234
|
+
"structured_response": {},
|
|
235
|
+
"stats": stats,
|
|
236
|
+
"model_used": active_model,
|
|
237
|
+
"conversation_history": current_history,
|
|
238
|
+
"refuse_answer": True,
|
|
239
|
+
"refuse_reason": self._refuse_reason,
|
|
240
|
+
"stages_used": [],
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Start agent loop
|
|
244
|
+
agent_start_time = time.time()
|
|
245
|
+
current_history.append({"role": "user", "content": user_input or "..."})
|
|
246
|
+
|
|
247
|
+
mode = instruct_trace.get("mode", self.current_mode).lower()
|
|
248
|
+
logger.success(f"Instruct Mode: {mode}")
|
|
249
|
+
self.current_mode = mode
|
|
250
|
+
|
|
251
|
+
# Determine max iterations
|
|
252
|
+
max_steps = 10 if mode == "agent" else 1
|
|
253
|
+
|
|
254
|
+
step = 0
|
|
255
|
+
agent_trace_steps: List[Dict[str, Any]] = []
|
|
256
|
+
last_system_prompt = ""
|
|
257
|
+
|
|
258
|
+
agent_tools: Optional[List[Dict[str, Any]]] = None
|
|
259
|
+
if mode == "agent":
|
|
260
|
+
agent_tools = [self.web_search_tool, self.image_search_tool, self.crawl_page_tool]
|
|
261
|
+
|
|
262
|
+
# Agent loop
|
|
263
|
+
while step < max_steps:
|
|
264
|
+
step += 1
|
|
265
|
+
logger.info(f"Pipeline: Agent step {step}/{max_steps}")
|
|
266
|
+
|
|
267
|
+
if step == 5 and mode == "agent":
|
|
268
|
+
current_history.append(
|
|
269
|
+
{
|
|
270
|
+
"role": "system",
|
|
271
|
+
"content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
|
|
272
|
+
}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
tools_desc = ""
|
|
276
|
+
if agent_tools:
|
|
277
|
+
tools_desc = "\n".join([
|
|
278
|
+
"- internal_web_search(query): 触发搜索并缓存结果",
|
|
279
|
+
"- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
|
|
280
|
+
])
|
|
281
|
+
|
|
282
|
+
user_msgs_text = user_input or ""
|
|
283
|
+
|
|
284
|
+
search_msgs_text = self._format_search_msgs()
|
|
285
|
+
# Image search results are NOT passed to LLM - they're for UI rendering only
|
|
286
|
+
|
|
287
|
+
has_search_results = any(r.get("_type") == "search" for r in self.all_web_results)
|
|
288
|
+
has_image_results = any(r.get("_type") == "image" for r in self.all_web_results) # For UI rendering only
|
|
289
|
+
|
|
290
|
+
# Build agent system prompt
|
|
291
|
+
mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
|
|
292
|
+
system_prompt = AGENT_SP.format(
|
|
293
|
+
user_msgs=user_msgs_text,
|
|
294
|
+
mode=mode,
|
|
295
|
+
mode_desc=mode_desc_text,
|
|
296
|
+
language=getattr(self.config, "language", "Simplified Chinese")[:128]
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Append vision text if available
|
|
300
|
+
if vision_text:
|
|
301
|
+
system_prompt += AGENT_SP_INSTRUCT_VISION_ADD.format(vision_msgs=vision_text)
|
|
302
|
+
|
|
303
|
+
# Append search results (text and page only, NOT images)
|
|
304
|
+
page_msgs_text = self._format_page_msgs()
|
|
305
|
+
all_search_parts = []
|
|
306
|
+
if has_search_results and search_msgs_text:
|
|
307
|
+
all_search_parts.append(search_msgs_text)
|
|
308
|
+
if page_msgs_text:
|
|
309
|
+
all_search_parts.append(page_msgs_text)
|
|
310
|
+
# Images are excluded from LLM prompt - they're for UI rendering only
|
|
311
|
+
|
|
312
|
+
if all_search_parts:
|
|
313
|
+
system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs="\n".join(all_search_parts))
|
|
314
|
+
|
|
315
|
+
last_system_prompt = system_prompt
|
|
316
|
+
|
|
317
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
318
|
+
messages.extend(current_history)
|
|
319
|
+
|
|
320
|
+
tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
|
|
321
|
+
|
|
322
|
+
# Debug logging
|
|
323
|
+
if tools_for_step:
|
|
324
|
+
logger.info(f"[Agent] Tools provided: {[t['function']['name'] for t in tools_for_step]}")
|
|
325
|
+
else:
|
|
326
|
+
logger.warning(f"[Agent] NO TOOLS provided for step {step} (agent_tools={agent_tools is not None}, step<max={step < max_steps})")
|
|
327
|
+
|
|
328
|
+
step_llm_start = time.time()
|
|
329
|
+
response, step_usage = await self._safe_llm_call(
|
|
330
|
+
messages=messages,
|
|
331
|
+
model=active_model,
|
|
332
|
+
tools=tools_for_step,
|
|
333
|
+
tool_choice="auto" if tools_for_step else None,
|
|
334
|
+
extra_body=self.config.extra_body,
|
|
335
|
+
)
|
|
336
|
+
step_llm_time = time.time() - step_llm_start
|
|
337
|
+
|
|
338
|
+
# Debug: Check response
|
|
339
|
+
has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
|
|
340
|
+
logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
|
|
341
|
+
|
|
342
|
+
# Accumulate agent usage
|
|
343
|
+
usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
|
|
344
|
+
usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
|
|
345
|
+
|
|
346
|
+
if response.tool_calls and tools_for_step:
|
|
347
|
+
tool_calls = response.tool_calls
|
|
348
|
+
stats["tool_calls_count"] += len(tool_calls)
|
|
349
|
+
|
|
350
|
+
# Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
|
|
351
|
+
assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
|
|
352
|
+
"role": "assistant",
|
|
353
|
+
"content": response.content,
|
|
354
|
+
"tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
|
|
355
|
+
}
|
|
356
|
+
current_history.append(assistant_msg)
|
|
357
|
+
|
|
358
|
+
tasks = [self._safe_route_tool(tc) for tc in tool_calls]
|
|
359
|
+
tool_start_time = time.time()
|
|
360
|
+
results = await asyncio.gather(*tasks)
|
|
361
|
+
tool_exec_time = time.time() - tool_start_time
|
|
362
|
+
|
|
363
|
+
step_trace = {
|
|
364
|
+
"step": step,
|
|
365
|
+
"tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
|
|
366
|
+
"tool_results": [],
|
|
367
|
+
"tool_time": tool_exec_time,
|
|
368
|
+
"llm_time": step_llm_time,
|
|
369
|
+
"usage": step_usage,
|
|
370
|
+
}
|
|
371
|
+
for i, result in enumerate(results):
|
|
372
|
+
tc = tool_calls[i]
|
|
373
|
+
step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
|
|
374
|
+
current_history.append(
|
|
375
|
+
{
|
|
376
|
+
"tool_call_id": tc.id,
|
|
377
|
+
"role": "tool",
|
|
378
|
+
"name": tc.function.name,
|
|
379
|
+
"content": str(result),
|
|
380
|
+
}
|
|
381
|
+
)
|
|
382
|
+
agent_trace_steps.append(step_trace)
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
final_response_content = response.content or ""
|
|
386
|
+
current_history.append({"role": "assistant", "content": final_response_content})
|
|
387
|
+
agent_trace_steps.append({
|
|
388
|
+
"step": step,
|
|
389
|
+
"final": True,
|
|
390
|
+
"output": final_response_content,
|
|
391
|
+
"llm_time": step_llm_time,
|
|
392
|
+
"usage": step_usage
|
|
393
|
+
})
|
|
394
|
+
break
|
|
395
|
+
|
|
396
|
+
if not final_response_content:
|
|
397
|
+
final_response_content = "执行结束,但未生成内容。"
|
|
398
|
+
|
|
399
|
+
structured = self._parse_tagged_response(final_response_content)
|
|
400
|
+
final_content = structured.get("response") or final_response_content
|
|
401
|
+
|
|
402
|
+
agent_time = time.time() - agent_start_time
|
|
403
|
+
|
|
404
|
+
# Calculate Agent Cost
|
|
405
|
+
agent_cost = 0.0
|
|
406
|
+
a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
|
|
407
|
+
a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
|
|
408
|
+
|
|
409
|
+
agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - instruct_usage.get("input_tokens", 0)
|
|
410
|
+
agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - instruct_usage.get("output_tokens", 0)
|
|
411
|
+
|
|
412
|
+
if a_in_price > 0 or a_out_price > 0:
|
|
413
|
+
agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
|
|
414
|
+
|
|
415
|
+
trace["agent"] = {
|
|
416
|
+
"model": active_model,
|
|
417
|
+
"base_url": self.config.base_url,
|
|
418
|
+
"system_prompt": last_system_prompt,
|
|
419
|
+
"steps": agent_trace_steps,
|
|
420
|
+
"final_output": final_response_content,
|
|
421
|
+
"time": agent_time,
|
|
422
|
+
"cost": agent_cost
|
|
423
|
+
}
|
|
424
|
+
trace_markdown = self._render_trace_markdown(trace)
|
|
425
|
+
|
|
426
|
+
stats["total_time"] = time.time() - start_time
|
|
427
|
+
stats["steps"] = step
|
|
428
|
+
|
|
429
|
+
# Calculate billing info correctly by summing up all actual costs
|
|
430
|
+
total_cost_sum = vision_cost + instruct_cost
|
|
431
|
+
for s in agent_trace_steps:
|
|
432
|
+
s_usage = s.get("usage", {})
|
|
433
|
+
if s_usage:
|
|
434
|
+
s_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
|
|
435
|
+
s_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
|
|
436
|
+
total_cost_sum += (s_usage.get("input_tokens", 0) / 1_000_000 * s_in_price) + (s_usage.get("output_tokens", 0) / 1_000_000 * s_out_price)
|
|
437
|
+
|
|
438
|
+
billing_info = {
|
|
439
|
+
"input_tokens": usage_totals["input_tokens"],
|
|
440
|
+
"output_tokens": usage_totals["output_tokens"],
|
|
441
|
+
"total_cost": total_cost_sum,
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
# Build stages_used list for UI display
|
|
445
|
+
stages_used = []
|
|
446
|
+
|
|
447
|
+
def infer_icon(model_name: str, base_url: str) -> str:
|
|
448
|
+
model_lower = (model_name or "").lower()
|
|
449
|
+
url_lower = (base_url or "").lower()
|
|
450
|
+
if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
|
|
451
|
+
elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
|
|
452
|
+
elif "gemini" in model_lower or "google" in url_lower: return "google"
|
|
453
|
+
elif "gpt" in model_lower or "openai" in url_lower: return "openai"
|
|
454
|
+
elif "qwen" in model_lower: return "qwen"
|
|
455
|
+
elif "openrouter" in url_lower: return "openrouter"
|
|
456
|
+
return "openai"
|
|
457
|
+
|
|
458
|
+
def infer_provider(base_url: str) -> str:
|
|
459
|
+
url_lower = (base_url or "").lower()
|
|
460
|
+
if "openrouter" in url_lower: return "OpenRouter"
|
|
461
|
+
elif "openai" in url_lower: return "OpenAI"
|
|
462
|
+
elif "anthropic" in url_lower: return "Anthropic"
|
|
463
|
+
elif "google" in url_lower: return "Google"
|
|
464
|
+
elif "deepseek" in url_lower: return "DeepSeek"
|
|
465
|
+
return ""
|
|
466
|
+
|
|
467
|
+
if trace.get("vision"):
|
|
468
|
+
v = trace["vision"]
|
|
469
|
+
v_model = v.get("model", "")
|
|
470
|
+
v_base_url = v.get("base_url", "") or self.config.base_url
|
|
471
|
+
stages_used.append({
|
|
472
|
+
"name": "Vision",
|
|
473
|
+
"model": v_model,
|
|
474
|
+
"icon_config": infer_icon(v_model, v_base_url),
|
|
475
|
+
"provider": infer_provider(v_base_url),
|
|
476
|
+
"time": v.get("time", 0),
|
|
477
|
+
"cost": v.get("cost", 0.0)
|
|
478
|
+
})
|
|
479
|
+
|
|
480
|
+
if trace.get("instruct"):
|
|
481
|
+
i = trace["instruct"]
|
|
482
|
+
i_model = i.get("model", "")
|
|
483
|
+
i_base_url = i.get("base_url", "") or self.config.base_url
|
|
484
|
+
stages_used.append({
|
|
485
|
+
"name": "Instruct",
|
|
486
|
+
"model": i_model,
|
|
487
|
+
"icon_config": infer_icon(i_model, i_base_url),
|
|
488
|
+
"provider": infer_provider(i_base_url),
|
|
489
|
+
"time": i.get("time", 0),
|
|
490
|
+
"cost": i.get("cost", 0.0)
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
# Show Search stage if we have ANY search results (text OR image)
|
|
494
|
+
if (has_search_results or has_image_results) and search_payloads:
|
|
495
|
+
# Collect initial search results for the Search stage card
|
|
496
|
+
initial_refs = [
|
|
497
|
+
{"title": r.get("title", ""), "url": r.get("url", ""), "domain": r.get("domain", "")}
|
|
498
|
+
for r in self.all_web_results if r.get("_type") == "search"
|
|
499
|
+
]
|
|
500
|
+
initial_images = [
|
|
501
|
+
{"title": r.get("title", ""), "url": r.get("url", ""), "thumbnail": r.get("thumbnail", "")}
|
|
502
|
+
for r in self.all_web_results if r.get("_type") == "image"
|
|
503
|
+
]
|
|
504
|
+
|
|
505
|
+
stages_used.append({
|
|
506
|
+
"name": "Search",
|
|
507
|
+
"model": getattr(self.config, "search_name", "DuckDuckGo"),
|
|
508
|
+
"icon_config": "search",
|
|
509
|
+
"provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
|
|
510
|
+
"time": search_time,
|
|
511
|
+
"cost": 0.0,
|
|
512
|
+
"references": initial_refs,
|
|
513
|
+
"image_references": initial_images
|
|
514
|
+
})
|
|
515
|
+
|
|
516
|
+
# Add Crawler stage if Instruct used crawl_page
|
|
517
|
+
if trace.get("instruct"):
|
|
518
|
+
instruct_tool_calls = trace["instruct"].get("tool_calls", [])
|
|
519
|
+
crawl_calls = [tc for tc in instruct_tool_calls if tc.get("name") == "crawl_page"]
|
|
520
|
+
if crawl_calls:
|
|
521
|
+
# Build crawled_pages list for UI
|
|
522
|
+
crawled_pages = []
|
|
523
|
+
for tc in crawl_calls:
|
|
524
|
+
url = tc.get("arguments", {}).get("url", "")
|
|
525
|
+
# Try to find cached result
|
|
526
|
+
found = next((r for r in self.all_web_results if r.get("url") == url and r.get("_type") == "page"), None)
|
|
527
|
+
if found:
|
|
528
|
+
try:
|
|
529
|
+
from urllib.parse import urlparse
|
|
530
|
+
domain = urlparse(url).netloc
|
|
531
|
+
except:
|
|
532
|
+
domain = ""
|
|
533
|
+
crawled_pages.append({
|
|
534
|
+
"title": found.get("title", "Page"),
|
|
535
|
+
"url": url,
|
|
536
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
537
|
+
})
|
|
538
|
+
|
|
539
|
+
stages_used.append({
|
|
540
|
+
"name": "Crawler",
|
|
541
|
+
"model": "Crawl4AI",
|
|
542
|
+
"icon_config": "search",
|
|
543
|
+
"provider": "网页抓取",
|
|
544
|
+
"time": search_time, # Use existing search_time which includes fetch time
|
|
545
|
+
"cost": 0.0,
|
|
546
|
+
"crawled_pages": crawled_pages
|
|
547
|
+
})
|
|
548
|
+
|
|
549
|
+
# --- Granular Agent Stages (Grouped) ---
|
|
550
|
+
if trace.get("agent"):
|
|
551
|
+
a = trace["agent"]
|
|
552
|
+
a_model = a.get("model", "") or active_model
|
|
553
|
+
a_base_url = a.get("base_url", "") or self.config.base_url
|
|
554
|
+
steps = a.get("steps", [])
|
|
555
|
+
agent_icon = infer_icon(a_model, a_base_url)
|
|
556
|
+
agent_provider = infer_provider(a_base_url)
|
|
557
|
+
|
|
558
|
+
for s in steps:
|
|
559
|
+
if "tool_calls" in s:
|
|
560
|
+
# 1. Agent Thought Stage (with LLM time)
|
|
561
|
+
# Calculate step cost
|
|
562
|
+
step_usage = s.get("usage", {})
|
|
563
|
+
step_cost = 0.0
|
|
564
|
+
if a_in_price > 0 or a_out_price > 0:
|
|
565
|
+
step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
|
|
566
|
+
|
|
567
|
+
stages_used.append({
|
|
568
|
+
"name": "Agent",
|
|
569
|
+
"model": a_model,
|
|
570
|
+
"icon_config": agent_icon,
|
|
571
|
+
"provider": agent_provider,
|
|
572
|
+
"time": s.get("llm_time", 0), "cost": step_cost
|
|
573
|
+
})
|
|
574
|
+
|
|
575
|
+
# 2. Grouped Tool Stages
|
|
576
|
+
# Collect results for grouping
|
|
577
|
+
search_group_items = []
|
|
578
|
+
crawler_group_items = []
|
|
579
|
+
|
|
580
|
+
tcs = s.get("tool_calls", [])
|
|
581
|
+
trs = s.get("tool_results", [])
|
|
582
|
+
|
|
583
|
+
for idx, tc in enumerate(tcs):
|
|
584
|
+
t_name = tc.get("name")
|
|
585
|
+
# Try to get result content if available
|
|
586
|
+
t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
|
|
587
|
+
|
|
588
|
+
if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
|
|
589
|
+
# We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
|
|
590
|
+
# But search results are cached in self.all_web_results.
|
|
591
|
+
# The 'content' of search tool result is basically "cached_for_prompt".
|
|
592
|
+
# So we don't need to put items here, just show "Search" container.
|
|
593
|
+
# But wait, if we want to show "what was searched", we can parse args.
|
|
594
|
+
args = tc.get("arguments", {})
|
|
595
|
+
query = args.get("query", "")
|
|
596
|
+
if query:
|
|
597
|
+
search_group_items.append({"query": query})
|
|
598
|
+
|
|
599
|
+
elif t_name == "crawl_page":
|
|
600
|
+
# Get URL from arguments, title from result
|
|
601
|
+
args = tc.get("arguments", {})
|
|
602
|
+
url = args.get("url", "")
|
|
603
|
+
title = "Page"
|
|
604
|
+
try:
|
|
605
|
+
page_data = json.loads(t_res_content)
|
|
606
|
+
if isinstance(page_data, dict):
|
|
607
|
+
title = page_data.get("title", "Page")
|
|
608
|
+
except:
|
|
609
|
+
pass
|
|
610
|
+
|
|
611
|
+
if url:
|
|
612
|
+
try:
|
|
613
|
+
domain = urlparse(url).netloc
|
|
614
|
+
except:
|
|
615
|
+
domain = ""
|
|
616
|
+
crawler_group_items.append({
|
|
617
|
+
"title": title,
|
|
618
|
+
"url": url,
|
|
619
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
620
|
+
})
|
|
621
|
+
|
|
622
|
+
# Append Grouped Stages
|
|
623
|
+
if search_group_items:
|
|
624
|
+
stages_used.append({
|
|
625
|
+
"name": "Search",
|
|
626
|
+
"model": getattr(self.config, "search_name", "DuckDuckGo"),
|
|
627
|
+
"icon_config": "search",
|
|
628
|
+
"provider": "Agent Search",
|
|
629
|
+
"time": s.get("tool_time", 0), "cost": 0,
|
|
630
|
+
"queries": search_group_items # Render can use this if needed, or just show generic
|
|
631
|
+
})
|
|
632
|
+
|
|
633
|
+
if crawler_group_items:
|
|
634
|
+
stages_used.append({
|
|
635
|
+
"name": "Crawler",
|
|
636
|
+
"model": "Crawl4AI",
|
|
637
|
+
"icon_config": "browser",
|
|
638
|
+
"provider": "Page Fetcher",
|
|
639
|
+
"time": s.get("tool_time", 0), "cost": 0,
|
|
640
|
+
"crawled_pages": crawler_group_items
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
elif s.get("final"):
|
|
644
|
+
# Correctly calculate final step cost
|
|
645
|
+
step_usage = s.get("usage", {})
|
|
646
|
+
step_cost = 0.0
|
|
647
|
+
if a_in_price > 0 or a_out_price > 0:
|
|
648
|
+
step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
|
|
649
|
+
|
|
650
|
+
stages_used.append({
|
|
651
|
+
"name": "Agent",
|
|
652
|
+
"model": a_model,
|
|
653
|
+
"icon_config": agent_icon,
|
|
654
|
+
"provider": agent_provider,
|
|
655
|
+
"time": s.get("llm_time", 0),
|
|
656
|
+
"cost": step_cost
|
|
657
|
+
})
|
|
658
|
+
|
|
659
|
+
# Assign total time/cost to last Agent stage
|
|
660
|
+
# Sum up total time/cost for UI/stats (implicit via loop above)
|
|
661
|
+
# No need to assign everything to last agent anymore as we distribute it.
|
|
662
|
+
|
|
663
|
+
# --- Final Filter: Only show cited items in workflow cards ---
|
|
664
|
+
cited_urls = {ref['url'] for ref in (structured.get("references", []) +
|
|
665
|
+
structured.get("page_references", []) +
|
|
666
|
+
structured.get("image_references", []))}
|
|
667
|
+
|
|
668
|
+
# Find images already rendered in markdown content (to avoid duplicate display)
|
|
669
|
+
markdown_image_urls = set()
|
|
670
|
+
md_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
|
|
671
|
+
for match in md_img_pattern.finditer(final_content):
|
|
672
|
+
markdown_image_urls.add(match.group(1))
|
|
673
|
+
|
|
674
|
+
for s in stages_used:
|
|
675
|
+
if "references" in s and s["references"]:
|
|
676
|
+
s["references"] = [r for r in s["references"] if r.get("url") in cited_urls]
|
|
677
|
+
# Filter out images already shown in markdown content
|
|
678
|
+
# Check both url AND thumbnail since either might be used in markdown
|
|
679
|
+
if "image_references" in s and s["image_references"]:
|
|
680
|
+
s["image_references"] = [
|
|
681
|
+
r for r in s["image_references"]
|
|
682
|
+
if r.get("url") not in markdown_image_urls and (r.get("thumbnail") or "") not in markdown_image_urls
|
|
683
|
+
]
|
|
684
|
+
if "crawled_pages" in s and s["crawled_pages"]:
|
|
685
|
+
s["crawled_pages"] = [r for r in s["crawled_pages"] if r.get("url") in cited_urls]
|
|
686
|
+
|
|
687
|
+
# Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
|
|
688
|
+
# Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
|
|
689
|
+
cleaned_history = []
|
|
690
|
+
for msg in current_history:
|
|
691
|
+
if msg.get("role") == "tool":
|
|
692
|
+
continue
|
|
693
|
+
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
694
|
+
continue
|
|
695
|
+
cleaned_history.append(msg)
|
|
696
|
+
|
|
697
|
+
# Update the reference (since it might be used by caller)
|
|
698
|
+
current_history[:] = cleaned_history
|
|
699
|
+
|
|
700
|
+
# --- Apply cached images to reduce render time ---
|
|
701
|
+
# Collect all image URLs that need caching (avoid duplicates when thumbnail == url)
|
|
702
|
+
all_image_urls = set()
|
|
703
|
+
for img_ref in structured.get("image_references", []):
|
|
704
|
+
if img_ref.get("thumbnail"):
|
|
705
|
+
all_image_urls.add(img_ref["thumbnail"])
|
|
706
|
+
if img_ref.get("url"):
|
|
707
|
+
all_image_urls.add(img_ref["url"])
|
|
708
|
+
|
|
709
|
+
for stage in stages_used:
|
|
710
|
+
for img_ref in stage.get("image_references", []):
|
|
711
|
+
if img_ref.get("thumbnail"):
|
|
712
|
+
all_image_urls.add(img_ref["thumbnail"])
|
|
713
|
+
if img_ref.get("url"):
|
|
714
|
+
all_image_urls.add(img_ref["url"])
|
|
715
|
+
|
|
716
|
+
# Also collect image URLs from markdown content
|
|
717
|
+
markdown_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
|
|
718
|
+
markdown_urls = markdown_img_pattern.findall(final_content)
|
|
719
|
+
all_image_urls.update(markdown_urls)
|
|
720
|
+
|
|
721
|
+
# Get cached versions (waits for pending downloads until agent ends)
|
|
722
|
+
if all_image_urls:
|
|
723
|
+
try:
|
|
724
|
+
cached_map = await get_cached_images(list(all_image_urls))
|
|
725
|
+
|
|
726
|
+
# Apply cached URLs to structured response
|
|
727
|
+
for img_ref in structured.get("image_references", []):
|
|
728
|
+
if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
|
|
729
|
+
img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
|
|
730
|
+
if img_ref.get("url") and img_ref["url"] in cached_map:
|
|
731
|
+
img_ref["url"] = cached_map[img_ref["url"]]
|
|
732
|
+
|
|
733
|
+
# Apply cached URLs to stages
|
|
734
|
+
for stage in stages_used:
|
|
735
|
+
for img_ref in stage.get("image_references", []):
|
|
736
|
+
if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
|
|
737
|
+
img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
|
|
738
|
+
if img_ref.get("url") and img_ref["url"] in cached_map:
|
|
739
|
+
img_ref["url"] = cached_map[img_ref["url"]]
|
|
740
|
+
|
|
741
|
+
# Replace image URLs in markdown content with cached versions
|
|
742
|
+
def replace_markdown_img(match):
|
|
743
|
+
full_match = match.group(0)
|
|
744
|
+
url = match.group(1)
|
|
745
|
+
cached_url = cached_map.get(url)
|
|
746
|
+
if cached_url and cached_url != url:
|
|
747
|
+
return full_match.replace(url, cached_url)
|
|
748
|
+
return full_match
|
|
749
|
+
|
|
750
|
+
final_content = markdown_img_pattern.sub(replace_markdown_img, final_content)
|
|
751
|
+
structured["response"] = markdown_img_pattern.sub(replace_markdown_img, structured.get("response", ""))
|
|
752
|
+
|
|
753
|
+
# Log cache stats
|
|
754
|
+
from .image_cache import get_image_cache
|
|
755
|
+
cache_stats = get_image_cache().get_stats()
|
|
756
|
+
logger.info(f"ImageCache stats: {cache_stats}")
|
|
757
|
+
|
|
758
|
+
except Exception as e:
|
|
759
|
+
logger.warning(f"Failed to apply image cache: {e}")
|
|
760
|
+
|
|
761
|
+
# Cancel all background image search/download tasks when agent ends
|
|
762
|
+
if self._image_search_tasks:
|
|
763
|
+
logger.info(f"Cancelling {len(self._image_search_tasks)} background image search tasks")
|
|
764
|
+
for task in self._image_search_tasks:
|
|
765
|
+
if not task.done():
|
|
766
|
+
task.cancel()
|
|
767
|
+
# Wait a bit for tasks to handle cancellation gracefully
|
|
768
|
+
try:
|
|
769
|
+
await asyncio.gather(*self._image_search_tasks, return_exceptions=True)
|
|
770
|
+
except Exception:
|
|
771
|
+
pass
|
|
772
|
+
self._image_search_tasks.clear()
|
|
773
|
+
|
|
774
|
+
# Also cancel any pending image downloads in the cache
|
|
775
|
+
from .image_cache import get_image_cache
|
|
776
|
+
cache = get_image_cache()
|
|
777
|
+
if cache._pending:
|
|
778
|
+
logger.info(f"Cancelling {len(cache._pending)} pending image downloads")
|
|
779
|
+
for task in cache._pending.values():
|
|
780
|
+
if not task.done():
|
|
781
|
+
task.cancel()
|
|
782
|
+
cache._pending.clear()
|
|
783
|
+
|
|
784
|
+
return {
|
|
785
|
+
"llm_response": final_content,
|
|
786
|
+
"structured_response": structured,
|
|
787
|
+
"stats": stats,
|
|
788
|
+
"model_used": active_model,
|
|
789
|
+
"vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
|
|
790
|
+
"conversation_history": current_history,
|
|
791
|
+
"trace_markdown": trace_markdown,
|
|
792
|
+
"billing_info": billing_info,
|
|
793
|
+
"stages_used": stages_used,
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
except Exception as e:
|
|
797
|
+
logger.exception("Pipeline Critical Failure")
|
|
798
|
+
# Cancel all background image tasks on error
|
|
799
|
+
if hasattr(self, '_image_search_tasks') and self._image_search_tasks:
|
|
800
|
+
for task in self._image_search_tasks:
|
|
801
|
+
if not task.done(): task.cancel()
|
|
802
|
+
# Wait briefly for cleanup
|
|
803
|
+
await asyncio.wait(self._image_search_tasks, timeout=0.1)
|
|
804
|
+
self._image_search_tasks.clear()
|
|
805
|
+
|
|
806
|
+
from .image_cache import get_image_cache
|
|
807
|
+
cache = get_image_cache()
|
|
808
|
+
if cache._pending:
|
|
809
|
+
pending_tasks = list(cache._pending.values())
|
|
810
|
+
for task in pending_tasks:
|
|
811
|
+
if not task.done(): task.cancel()
|
|
812
|
+
await asyncio.wait(pending_tasks, timeout=0.1)
|
|
813
|
+
cache._pending.clear()
|
|
814
|
+
return {
|
|
815
|
+
"llm_response": f"I encountered a critical error: {e}",
|
|
816
|
+
"stats": stats,
|
|
817
|
+
"error": str(e),
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
|
|
821
|
+
"""Parse response and auto-infer references from citations and markdown images.
|
|
822
|
+
"""
|
|
823
|
+
parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
|
|
824
|
+
if not text:
|
|
825
|
+
return parsed
|
|
826
|
+
|
|
827
|
+
import re
|
|
828
|
+
|
|
829
|
+
# 1. Strip trailing reference/source list
|
|
830
|
+
body_text = text
|
|
831
|
+
ref_list_pattern = re.compile(r'(?:\n\s*|^)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources|参考资料|引用)[\s\S]*$', re.IGNORECASE | re.MULTILINE)
|
|
832
|
+
body_text = ref_list_pattern.sub('', body_text)
|
|
833
|
+
|
|
834
|
+
remaining_text = body_text.strip()
|
|
835
|
+
|
|
836
|
+
# 2. Unwrap JSON if necessary
|
|
837
|
+
try:
|
|
838
|
+
if remaining_text.strip().startswith("{") and "action" in remaining_text:
|
|
839
|
+
data = json.loads(remaining_text)
|
|
840
|
+
if isinstance(data, dict) and "action_input" in data:
|
|
841
|
+
remaining_text = data["action_input"]
|
|
842
|
+
except Exception:
|
|
843
|
+
pass
|
|
844
|
+
|
|
845
|
+
# 3. Identify all citations [N] and direct markdown images ![]()
|
|
846
|
+
cited_ids = []
|
|
847
|
+
body_pattern = re.compile(r'\[(\d+)\]')
|
|
848
|
+
for match in body_pattern.finditer(remaining_text):
|
|
849
|
+
try:
|
|
850
|
+
cited_ids.append(int(match.group(1)))
|
|
851
|
+
except ValueError: pass
|
|
852
|
+
|
|
853
|
+
# Also find direct URLs in ![]()
|
|
854
|
+
direct_image_urls = []
|
|
855
|
+
img_pattern = re.compile(r'!\[.*?\]\((.*?)\)')
|
|
856
|
+
for match in img_pattern.finditer(remaining_text):
|
|
857
|
+
url = match.group(1).strip()
|
|
858
|
+
if url and not url.startswith('['): # Not a [N] citation
|
|
859
|
+
direct_image_urls.append(url)
|
|
860
|
+
|
|
861
|
+
# 4. Build Citation Maps and Reference Lists
|
|
862
|
+
unified_id_map = {}
|
|
863
|
+
# Keep track of what we've already added to avoid duplicates
|
|
864
|
+
seen_urls = set()
|
|
865
|
+
|
|
866
|
+
# id_order needs to be unique and preserve appearance order
|
|
867
|
+
id_order = []
|
|
868
|
+
for id_val in cited_ids:
|
|
869
|
+
if id_val not in id_order:
|
|
870
|
+
id_order.append(id_val)
|
|
871
|
+
|
|
872
|
+
# Process [N] citations first to determine numbering
|
|
873
|
+
for old_id in id_order:
|
|
874
|
+
result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
|
|
875
|
+
if not result_item: continue
|
|
876
|
+
|
|
877
|
+
url = result_item.get("url", "")
|
|
878
|
+
item_type = result_item.get("_type", "")
|
|
879
|
+
|
|
880
|
+
entry = {
|
|
881
|
+
"title": result_item.get("title", ""),
|
|
882
|
+
"url": url,
|
|
883
|
+
"domain": result_item.get("domain", "")
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if item_type == "search":
|
|
887
|
+
parsed["references"].append(entry)
|
|
888
|
+
unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
|
|
889
|
+
seen_urls.add(url)
|
|
890
|
+
elif item_type == "page":
|
|
891
|
+
parsed["page_references"].append(entry)
|
|
892
|
+
unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
|
|
893
|
+
seen_urls.add(url)
|
|
894
|
+
elif item_type == "image":
|
|
895
|
+
entry["thumbnail"] = result_item.get("thumbnail", "")
|
|
896
|
+
if url not in seen_urls:
|
|
897
|
+
parsed["image_references"].append(entry)
|
|
898
|
+
seen_urls.add(url)
|
|
899
|
+
# Note: Images cited as [N] might be used in text like 
|
|
900
|
+
# We'll handle this in replacement
|
|
901
|
+
|
|
902
|
+
# Now handle direct image URLs from ![]() that weren't cited as [N]
|
|
903
|
+
for url in direct_image_urls:
|
|
904
|
+
if url in seen_urls: continue
|
|
905
|
+
# Find in all_web_results
|
|
906
|
+
result_item = next((r for r in self.all_web_results if (r.get("url") == url or r.get("image") == url) and r.get("_type") == "image"), None)
|
|
907
|
+
if result_item:
|
|
908
|
+
entry = {
|
|
909
|
+
"title": result_item.get("title", ""),
|
|
910
|
+
"url": url,
|
|
911
|
+
"domain": result_item.get("domain", ""),
|
|
912
|
+
"thumbnail": result_item.get("thumbnail", "")
|
|
913
|
+
}
|
|
914
|
+
parsed["image_references"].append(entry)
|
|
915
|
+
seen_urls.add(url)
|
|
916
|
+
|
|
917
|
+
# 5. Replacement Logic
|
|
918
|
+
# Define image replacement map separately to handle 
|
|
919
|
+
image_url_map = {} # old_id -> raw_url
|
|
920
|
+
for old_id in id_order:
|
|
921
|
+
item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
|
|
922
|
+
if item and item.get("_type") == "image":
|
|
923
|
+
image_url_map[old_id] = item.get("url", "")
|
|
924
|
+
|
|
925
|
+
def refined_replace(text):
|
|
926
|
+
# First, handle  specifically
|
|
927
|
+
# We want to replace the [N] with the actual URL so the markdown renders
|
|
928
|
+
def sub_img_ref(match):
|
|
929
|
+
alt = match.group(1)
|
|
930
|
+
ref = match.group(2)
|
|
931
|
+
inner_match = body_pattern.match(ref)
|
|
932
|
+
if inner_match:
|
|
933
|
+
oid = int(inner_match.group(1))
|
|
934
|
+
if oid in image_url_map:
|
|
935
|
+
return f""
|
|
936
|
+
return match.group(0)
|
|
937
|
+
|
|
938
|
+
text = re.sub(r'!\[(.*?)\]\((.*?)\)', sub_img_ref, text)
|
|
939
|
+
|
|
940
|
+
# Then handle normal [N] replacements
|
|
941
|
+
def sub_norm_ref(match):
|
|
942
|
+
oid = int(match.group(1))
|
|
943
|
+
if oid in unified_id_map:
|
|
944
|
+
return f"[{unified_id_map[oid]}]"
|
|
945
|
+
if oid in image_url_map:
|
|
946
|
+
return "" # Remove standalone image citations like [5] if they aren't in ![]()
|
|
947
|
+
return "" # Remove hallucinated or invalid citations like [99] if not found in results
|
|
948
|
+
|
|
949
|
+
return body_pattern.sub(sub_norm_ref, text)
|
|
950
|
+
|
|
951
|
+
final_text = refined_replace(remaining_text)
|
|
952
|
+
parsed["response"] = final_text.strip()
|
|
953
|
+
return parsed
|
|
954
|
+
|
|
955
|
+
async def _safe_route_tool(self, tool_call):
|
|
956
|
+
"""Wrapper for safe concurrent execution of tool calls."""
|
|
957
|
+
try:
|
|
958
|
+
return await asyncio.wait_for(self._route_tool(tool_call), timeout=30.0)
|
|
959
|
+
except asyncio.TimeoutError:
|
|
960
|
+
return "Error: Tool execution timed out (30s limit)."
|
|
961
|
+
except Exception as e:
|
|
962
|
+
return f"Error: Tool execution failed: {e}"
|
|
963
|
+
|
|
964
|
+
async def _route_tool(self, tool_call):
|
|
965
|
+
"""Execute tool call and return result."""
|
|
966
|
+
name = tool_call.function.name
|
|
967
|
+
args = json.loads(html.unescape(tool_call.function.arguments))
|
|
968
|
+
|
|
969
|
+
if name == "internal_web_search" or name == "web_search":
|
|
970
|
+
query = args.get("query")
|
|
971
|
+
web = await self.search_service.search(query)
|
|
972
|
+
|
|
973
|
+
# Cache results and assign global IDs
|
|
974
|
+
for item in web:
|
|
975
|
+
self.global_id_counter += 1
|
|
976
|
+
item["_id"] = self.global_id_counter
|
|
977
|
+
item["_type"] = "search"
|
|
978
|
+
item["query"] = query
|
|
979
|
+
self.all_web_results.append(item)
|
|
980
|
+
|
|
981
|
+
return json.dumps({"web_results_count": len(web), "status": "cached_for_prompt"}, ensure_ascii=False)
|
|
982
|
+
|
|
983
|
+
if name == "internal_image_search":
|
|
984
|
+
query = args.get("query")
|
|
985
|
+
# Start image search in background (non-blocking)
|
|
986
|
+
# Images are for UI rendering only, not passed to LLM
|
|
987
|
+
async def _background_image_search():
|
|
988
|
+
try:
|
|
989
|
+
images = await self.search_service.image_search(query)
|
|
990
|
+
# Cache results and assign global IDs for UI rendering
|
|
991
|
+
for item in images:
|
|
992
|
+
self.global_id_counter += 1
|
|
993
|
+
item["_id"] = self.global_id_counter
|
|
994
|
+
item["_type"] = "image"
|
|
995
|
+
item["query"] = query
|
|
996
|
+
item["is_image"] = True
|
|
997
|
+
self.all_web_results.append(item)
|
|
998
|
+
logger.info(f"Background image search completed: {len(images)} images for query '{query}'")
|
|
999
|
+
except (asyncio.CancelledError, Exception) as e:
|
|
1000
|
+
# Silently handle cancellation or minor errors in background pre-warming
|
|
1001
|
+
if isinstance(e, asyncio.CancelledError):
|
|
1002
|
+
logger.debug(f"Background image search cancelled for query '{query}'")
|
|
1003
|
+
else:
|
|
1004
|
+
logger.error(f"Background image search failed for query '{query}': {e}")
|
|
1005
|
+
|
|
1006
|
+
task = asyncio.create_task(_background_image_search())
|
|
1007
|
+
self._image_search_tasks.append(task)
|
|
1008
|
+
|
|
1009
|
+
# Return immediately without waiting for search to complete
|
|
1010
|
+
return json.dumps({"image_results_count": 0, "status": "searching_in_background"}, ensure_ascii=False)
|
|
1011
|
+
|
|
1012
|
+
if name == "crawl_page":
|
|
1013
|
+
url = args.get("url")
|
|
1014
|
+
logger.info(f"[Tool] Crawling page: {url}")
|
|
1015
|
+
# Returns Dict: {content, title, url}
|
|
1016
|
+
result_dict = await self.search_service.fetch_page(url)
|
|
1017
|
+
|
|
1018
|
+
# Cache the crawled content with global ID
|
|
1019
|
+
self.global_id_counter += 1
|
|
1020
|
+
|
|
1021
|
+
cached_item = {
|
|
1022
|
+
"_id": self.global_id_counter,
|
|
1023
|
+
"_type": "page",
|
|
1024
|
+
"title": result_dict.get("title", "Page"),
|
|
1025
|
+
"url": result_dict.get("url", url),
|
|
1026
|
+
"content": result_dict.get("content", ""),
|
|
1027
|
+
"domain": "",
|
|
1028
|
+
"is_crawled": True,
|
|
1029
|
+
}
|
|
1030
|
+
try:
|
|
1031
|
+
from urllib.parse import urlparse
|
|
1032
|
+
cached_item["domain"] = urlparse(url).netloc
|
|
1033
|
+
except:
|
|
1034
|
+
pass
|
|
1035
|
+
|
|
1036
|
+
self.all_web_results.append(cached_item)
|
|
1037
|
+
|
|
1038
|
+
return json.dumps({"crawl_status": "success", "title": cached_item["title"], "content_length": len(result_dict.get("content", ""))}, ensure_ascii=False)
|
|
1039
|
+
|
|
1040
|
+
if name == "set_mode":
|
|
1041
|
+
mode = args.get("mode", "standard")
|
|
1042
|
+
self.current_mode = mode
|
|
1043
|
+
return f"Mode set to {mode}"
|
|
1044
|
+
|
|
1045
|
+
if name == "refuse_answer":
|
|
1046
|
+
reason = args.get("reason", "")
|
|
1047
|
+
self._should_refuse = True
|
|
1048
|
+
self._refuse_reason = reason
|
|
1049
|
+
logger.info(f"[Tool] refuse_answer called. Reason: {reason}")
|
|
1050
|
+
return "Refuse answer triggered. Pipeline will terminate early."
|
|
1051
|
+
|
|
1052
|
+
return f"Unknown tool {name}"
|
|
1053
|
+
|
|
1054
|
+
|
|
1055
|
+
async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None, extra_body: Optional[Dict[str, Any]] = None):
|
|
1056
|
+
try:
|
|
1057
|
+
return await asyncio.wait_for(
|
|
1058
|
+
self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client, extra_body=extra_body),
|
|
1059
|
+
timeout=120.0,
|
|
1060
|
+
)
|
|
1061
|
+
except asyncio.TimeoutError:
|
|
1062
|
+
logger.error("LLM Call Timed Out")
|
|
1063
|
+
return type("obj", (object,), {"content": "Error: The model took too long to respond.", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
|
|
1064
|
+
except Exception as e:
|
|
1065
|
+
logger.error(f"LLM Call Failed: {e}")
|
|
1066
|
+
return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
|
|
1067
|
+
|
|
1068
|
+
async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI, extra_body: Optional[Dict[str, Any]] = None):
|
|
1069
|
+
try:
|
|
1070
|
+
payload_debug = json.dumps(messages)
|
|
1071
|
+
logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
|
|
1072
|
+
except Exception:
|
|
1073
|
+
pass
|
|
1074
|
+
|
|
1075
|
+
t0 = time.time()
|
|
1076
|
+
logger.info("LLM Request SENT to API...")
|
|
1077
|
+
response = await client.chat.completions.create(
|
|
1078
|
+
model=model,
|
|
1079
|
+
messages=messages,
|
|
1080
|
+
tools=tools,
|
|
1081
|
+
tool_choice=tool_choice,
|
|
1082
|
+
temperature=self.config.temperature,
|
|
1083
|
+
extra_body=extra_body,
|
|
1084
|
+
)
|
|
1085
|
+
logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
|
|
1086
|
+
|
|
1087
|
+
usage = {"input_tokens": 0, "output_tokens": 0}
|
|
1088
|
+
if hasattr(response, "usage") and response.usage:
|
|
1089
|
+
usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
1090
|
+
usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
1091
|
+
|
|
1092
|
+
return response.choices[0].message, usage
|
|
1093
|
+
|
|
1094
|
+
async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
|
|
1095
|
+
content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
|
|
1096
|
+
for img_b64 in images:
|
|
1097
|
+
url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
1098
|
+
content_payload.append({"type": "image_url", "image_url": {"url": url}})
|
|
1099
|
+
|
|
1100
|
+
client = self._client_for(
|
|
1101
|
+
api_key=getattr(self.config, "vision_api_key", None),
|
|
1102
|
+
base_url=getattr(self.config, "vision_base_url", None),
|
|
1103
|
+
)
|
|
1104
|
+
response, usage = await self._safe_llm_call(
|
|
1105
|
+
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
|
|
1106
|
+
model=model,
|
|
1107
|
+
client=client,
|
|
1108
|
+
extra_body=getattr(self.config, "vision_extra_body", None),
|
|
1109
|
+
)
|
|
1110
|
+
return (response.content or "").strip(), usage
|
|
1111
|
+
|
|
1112
|
+
async def _run_instruct_stage(
|
|
1113
|
+
self, user_input: str, vision_text: str, model: str
|
|
1114
|
+
) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
|
|
1115
|
+
"""Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
|
|
1116
|
+
# Instruct has access to: web_search, image_search, set_mode, crawl_page, refuse_answer
|
|
1117
|
+
tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool, self.refuse_answer_tool]
|
|
1118
|
+
tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式\n- refuse_answer: 拒绝回答(敏感/违规内容)"
|
|
1119
|
+
|
|
1120
|
+
prompt = INSTRUCT_SP.format(user_msgs=user_input or "", tools_desc=tools_desc)
|
|
1121
|
+
|
|
1122
|
+
if vision_text:
|
|
1123
|
+
prompt = f"{prompt}\\n\\n{INSTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
|
|
1124
|
+
|
|
1125
|
+
client = self._client_for(
|
|
1126
|
+
api_key=getattr(self.config, "instruct_api_key", None),
|
|
1127
|
+
base_url=getattr(self.config, "instruct_base_url", None),
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
history: List[Dict[str, Any]] = [
|
|
1131
|
+
{"role": "system", "content": prompt},
|
|
1132
|
+
{"role": "user", "content": user_input or "..."},
|
|
1133
|
+
]
|
|
1134
|
+
|
|
1135
|
+
response, usage = await self._safe_llm_call(
|
|
1136
|
+
messages=history,
|
|
1137
|
+
model=model,
|
|
1138
|
+
tools=tools,
|
|
1139
|
+
tool_choice="auto",
|
|
1140
|
+
client=client,
|
|
1141
|
+
extra_body=getattr(self.config, "instruct_extra_body", None),
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
search_payloads: List[str] = []
|
|
1145
|
+
instruct_trace: Dict[str, Any] = {
|
|
1146
|
+
"model": model,
|
|
1147
|
+
"base_url": getattr(self.config, "instruct_base_url", None) or self.config.base_url,
|
|
1148
|
+
"prompt": prompt,
|
|
1149
|
+
"user_input": user_input or "",
|
|
1150
|
+
"vision_add": vision_text or "",
|
|
1151
|
+
"tool_calls": [],
|
|
1152
|
+
"tool_results": [],
|
|
1153
|
+
"output": "",
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
search_time = 0.0
|
|
1157
|
+
mode = "standard"
|
|
1158
|
+
mode_reason = ""
|
|
1159
|
+
|
|
1160
|
+
if response.tool_calls:
|
|
1161
|
+
plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
|
|
1162
|
+
history.append(plan_dict)
|
|
1163
|
+
|
|
1164
|
+
tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
|
|
1165
|
+
|
|
1166
|
+
st = time.time()
|
|
1167
|
+
results = await asyncio.gather(*tasks)
|
|
1168
|
+
search_time = time.time() - st
|
|
1169
|
+
|
|
1170
|
+
for i, result in enumerate(results):
|
|
1171
|
+
tc = response.tool_calls[i]
|
|
1172
|
+
history.append(
|
|
1173
|
+
{"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
|
|
1174
|
+
)
|
|
1175
|
+
instruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
|
|
1176
|
+
instruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
|
|
1177
|
+
|
|
1178
|
+
if tc.function.name in ["web_search", "internal_web_search"]:
|
|
1179
|
+
search_payloads.append(str(result))
|
|
1180
|
+
elif tc.function.name == "set_mode":
|
|
1181
|
+
try:
|
|
1182
|
+
args = json.loads(html.unescape(tc.function.arguments))
|
|
1183
|
+
except Exception:
|
|
1184
|
+
args = {}
|
|
1185
|
+
mode = args.get("mode", mode)
|
|
1186
|
+
mode_reason = args.get("reason", "")
|
|
1187
|
+
|
|
1188
|
+
instruct_trace["mode"] = mode
|
|
1189
|
+
if mode_reason:
|
|
1190
|
+
instruct_trace["mode_reason"] = mode_reason
|
|
1191
|
+
|
|
1192
|
+
instruct_trace["output"] = ""
|
|
1193
|
+
instruct_trace["usage"] = usage
|
|
1194
|
+
return "", search_payloads, instruct_trace, usage, search_time
|
|
1195
|
+
|
|
1196
|
+
instruct_trace["mode"] = mode
|
|
1197
|
+
instruct_trace["output"] = (response.content or "").strip()
|
|
1198
|
+
instruct_trace["usage"] = usage
|
|
1199
|
+
return "", search_payloads, instruct_trace, usage, 0.0
|
|
1200
|
+
|
|
1201
|
+
def _format_search_msgs(self) -> str:
|
|
1202
|
+
"""Format search snippets only (not crawled pages)."""
|
|
1203
|
+
if not self.all_web_results:
|
|
1204
|
+
return ""
|
|
1205
|
+
|
|
1206
|
+
lines = []
|
|
1207
|
+
for res in self.all_web_results:
|
|
1208
|
+
if res.get("_type") != "search": continue # Only search results
|
|
1209
|
+
idx = res.get("_id")
|
|
1210
|
+
title = (res.get("title", "") or "").strip()
|
|
1211
|
+
url = res.get("url", "")
|
|
1212
|
+
content = (res.get("content", "") or "").strip()
|
|
1213
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
|
|
1214
|
+
|
|
1215
|
+
return "\n".join(lines)
|
|
1216
|
+
|
|
1217
|
+
def _format_page_msgs(self) -> str:
|
|
1218
|
+
"""Format crawled page content (detailed)."""
|
|
1219
|
+
if not self.all_web_results:
|
|
1220
|
+
return ""
|
|
1221
|
+
|
|
1222
|
+
lines = []
|
|
1223
|
+
for res in self.all_web_results:
|
|
1224
|
+
if res.get("_type") != "page": continue # Only page results
|
|
1225
|
+
idx = res.get("_id")
|
|
1226
|
+
title = (res.get("title", "") or "").strip()
|
|
1227
|
+
url = res.get("url", "")
|
|
1228
|
+
content = (res.get("content", "") or "").strip()
|
|
1229
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
|
|
1230
|
+
|
|
1231
|
+
return "\n".join(lines)
|
|
1232
|
+
|
|
1233
|
+
def _format_image_search_msgs(self) -> str:
|
|
1234
|
+
if not self.all_web_results:
|
|
1235
|
+
return ""
|
|
1236
|
+
|
|
1237
|
+
lines = []
|
|
1238
|
+
for res in self.all_web_results:
|
|
1239
|
+
if res.get("_type") != "image": continue # Only image results
|
|
1240
|
+
idx = res.get("_id")
|
|
1241
|
+
title = res.get("title", "")
|
|
1242
|
+
url = res.get("image", "") or res.get("url", "")
|
|
1243
|
+
thumb = res.get("thumbnail", "")
|
|
1244
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nThumbnail: {thumb}\n")
|
|
1245
|
+
return "\n".join(lines)
|
|
1246
|
+
|
|
1247
|
+
def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
|
|
1248
|
+
if api_key or base_url:
|
|
1249
|
+
return AsyncOpenAI(base_url=base_url or self.config.base_url, api_key=api_key or self.config.api_key)
|
|
1250
|
+
return self.client
|
|
1251
|
+
|
|
1252
|
+
def _tool_call_to_trace(self, tool_call) -> Dict[str, Any]:
|
|
1253
|
+
try:
|
|
1254
|
+
args = json.loads(html.unescape(tool_call.function.arguments))
|
|
1255
|
+
except Exception:
|
|
1256
|
+
args = tool_call.function.arguments
|
|
1257
|
+
return {"id": getattr(tool_call, "id", None), "name": tool_call.function.name, "arguments": args}
|
|
1258
|
+
|
|
1259
|
+
def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
|
|
1260
|
+
def fence(label: str, content: str) -> str:
|
|
1261
|
+
safe = (content or "").replace("```", "``\\`")
|
|
1262
|
+
return f"```{label}\n{safe}\n```"
|
|
1263
|
+
|
|
1264
|
+
parts: List[str] = []
|
|
1265
|
+
parts.append("# Pipeline Trace\n")
|
|
1266
|
+
|
|
1267
|
+
if trace.get("vision"):
|
|
1268
|
+
v = trace["vision"]
|
|
1269
|
+
parts.append("## Vision\n")
|
|
1270
|
+
parts.append(f"- model: `{v.get('model')}`")
|
|
1271
|
+
parts.append(f"- base_url: `{v.get('base_url')}`")
|
|
1272
|
+
parts.append(f"- images_count: `{v.get('images_count')}`\n")
|
|
1273
|
+
parts.append("### Prompt\n")
|
|
1274
|
+
parts.append(fence("text", v.get("prompt", "")))
|
|
1275
|
+
parts.append("\n### Output\n")
|
|
1276
|
+
parts.append(fence("text", v.get("output", "")))
|
|
1277
|
+
parts.append("")
|
|
1278
|
+
|
|
1279
|
+
if trace.get("instruct"):
|
|
1280
|
+
t = trace["instruct"]
|
|
1281
|
+
parts.append("## Instruct\n")
|
|
1282
|
+
parts.append(f"- model: `{t.get('model')}`")
|
|
1283
|
+
parts.append(f"- base_url: `{t.get('base_url')}`\n")
|
|
1284
|
+
parts.append("### Prompt\n")
|
|
1285
|
+
parts.append(fence("text", t.get("prompt", "")))
|
|
1286
|
+
if t.get("tool_calls"):
|
|
1287
|
+
parts.append("\n### Tool Calls\n")
|
|
1288
|
+
parts.append(fence("json", json.dumps(t.get("tool_calls"), ensure_ascii=False, indent=2)))
|
|
1289
|
+
if t.get("tool_results"):
|
|
1290
|
+
parts.append("\n### Tool Results\n")
|
|
1291
|
+
parts.append(fence("json", json.dumps(t.get("tool_results"), ensure_ascii=False, indent=2)))
|
|
1292
|
+
parts.append("\n### Output\n")
|
|
1293
|
+
parts.append(fence("text", t.get("output", "")))
|
|
1294
|
+
parts.append("")
|
|
1295
|
+
|
|
1296
|
+
if trace.get("agent"):
|
|
1297
|
+
a = trace["agent"]
|
|
1298
|
+
parts.append("## Agent\n")
|
|
1299
|
+
parts.append(f"- model: `{a.get('model')}`")
|
|
1300
|
+
parts.append(f"- base_url: `{a.get('base_url')}`\n")
|
|
1301
|
+
parts.append("### System Prompt\n")
|
|
1302
|
+
parts.append(fence("text", a.get("system_prompt", "")))
|
|
1303
|
+
parts.append("\n### Steps\n")
|
|
1304
|
+
parts.append(fence("json", json.dumps(a.get("steps", []), ensure_ascii=False, indent=2)))
|
|
1305
|
+
parts.append("\n### Final Output\n")
|
|
1306
|
+
parts.append(fence("text", a.get("final_output", "")))
|
|
1307
|
+
|
|
1308
|
+
return "\n".join(parts).strip() + "\n"
|
|
1309
|
+
|
|
1310
|
+
async def close(self):
|
|
1311
|
+
try:
|
|
1312
|
+
await self.search_service.close()
|
|
1313
|
+
except Exception:
|
|
1314
|
+
pass
|
|
1315
|
+
|
|
1316
|
+
# Gracefully handle background tasks completion
|
|
1317
|
+
if hasattr(self, '_image_search_tasks') and self._image_search_tasks:
|
|
1318
|
+
for task in self._image_search_tasks:
|
|
1319
|
+
if not task.done(): task.cancel()
|
|
1320
|
+
try:
|
|
1321
|
+
# Wait briefly for cancellation to propagate
|
|
1322
|
+
await asyncio.wait(self._image_search_tasks, timeout=0.2)
|
|
1323
|
+
except Exception: pass
|
|
1324
|
+
self._image_search_tasks = []
|
|
1325
|
+
|
|
1326
|
+
# Also cleanup image cache pending tasks if any
|
|
1327
|
+
try:
|
|
1328
|
+
from .image_cache import get_image_cache
|
|
1329
|
+
cache = get_image_cache()
|
|
1330
|
+
if cache._pending:
|
|
1331
|
+
pending = list(cache._pending.values())
|
|
1332
|
+
for task in pending:
|
|
1333
|
+
if not task.done(): task.cancel()
|
|
1334
|
+
await asyncio.wait(pending, timeout=0.2)
|
|
1335
|
+
cache._pending.clear()
|
|
1336
|
+
except Exception: pass
|
|
1337
|
+
|
|
1338
|
+
self.all_web_results = []
|