entari-plugin-hyw 4.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +532 -0
- entari_plugin_hyw/assets/card-dist/index.html +387 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
- entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +1 -0
- entari_plugin_hyw/assets/icon/google.svg +1 -0
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/huggingface.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +1 -0
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/browser/__init__.py +10 -0
- entari_plugin_hyw/browser/engines/base.py +13 -0
- entari_plugin_hyw/browser/engines/bing.py +95 -0
- entari_plugin_hyw/browser/engines/searxng.py +137 -0
- entari_plugin_hyw/browser/landing.html +172 -0
- entari_plugin_hyw/browser/manager.py +153 -0
- entari_plugin_hyw/browser/service.py +275 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +756 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +382 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +240 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +29 -0
- entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
- entari_plugin_hyw/card-ui/src/types.ts +61 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/definitions.py +130 -0
- entari_plugin_hyw/history.py +248 -0
- entari_plugin_hyw/image_cache.py +274 -0
- entari_plugin_hyw/misc.py +135 -0
- entari_plugin_hyw/modular_pipeline.py +351 -0
- entari_plugin_hyw/render_vue.py +401 -0
- entari_plugin_hyw/search.py +116 -0
- entari_plugin_hyw/stage_base.py +88 -0
- entari_plugin_hyw/stage_instruct.py +328 -0
- entari_plugin_hyw/stage_instruct_review.py +92 -0
- entari_plugin_hyw/stage_summary.py +164 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/METADATA +116 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/RECORD +99 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/WHEEL +5 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instruct Stage
|
|
3
|
+
|
|
4
|
+
Handles initial task planning and search generation.
|
|
5
|
+
Analyze user query and execute initial searches.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
import asyncio
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
from loguru import logger
|
|
13
|
+
from openai import AsyncOpenAI
|
|
14
|
+
|
|
15
|
+
from .stage_base import BaseStage, StageContext, StageResult
|
|
16
|
+
from .definitions import (
|
|
17
|
+
get_refuse_answer_tool,
|
|
18
|
+
get_web_search_tool,
|
|
19
|
+
get_crawl_page_tool,
|
|
20
|
+
INSTRUCT_SP
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
class InstructStage(BaseStage):
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
return "Instruct"
|
|
27
|
+
|
|
28
|
+
def __init__(self, config: Any, search_service: Any, client: AsyncOpenAI):
|
|
29
|
+
super().__init__(config, search_service, client)
|
|
30
|
+
|
|
31
|
+
self.refuse_answer_tool = get_refuse_answer_tool()
|
|
32
|
+
self.web_search_tool = get_web_search_tool()
|
|
33
|
+
self.crawl_page_tool = get_crawl_page_tool()
|
|
34
|
+
|
|
35
|
+
async def execute(self, context: StageContext) -> StageResult:
|
|
36
|
+
start_time = time.time()
|
|
37
|
+
|
|
38
|
+
# --- Round 1: Initial Discovery ---
|
|
39
|
+
logger.info("Instruct: Starting Round 1 (Initial Discovery)")
|
|
40
|
+
|
|
41
|
+
# Build Round 1 User Message
|
|
42
|
+
r1_user_content = self._build_user_message(context)
|
|
43
|
+
r1_messages = [
|
|
44
|
+
{"role": "system", "content": INSTRUCT_SP},
|
|
45
|
+
{"role": "user", "content": r1_user_content}
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
# Execute Round 1 LLM
|
|
49
|
+
r1_response, r1_usage, r1_tool_calls, r1_content = await self._call_llm(
|
|
50
|
+
messages=r1_messages,
|
|
51
|
+
tools=[self.refuse_answer_tool, self.web_search_tool, self.crawl_page_tool],
|
|
52
|
+
tool_choice="auto"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
if context.should_refuse:
|
|
56
|
+
# If refused in Round 1, stop here
|
|
57
|
+
return self._build_result(start_time, r1_usage, r1_content, len(r1_tool_calls or []))
|
|
58
|
+
|
|
59
|
+
# Execute Round 1 Tools
|
|
60
|
+
r1_tool_outputs = []
|
|
61
|
+
if r1_tool_calls:
|
|
62
|
+
r1_tool_outputs = await self._process_tool_calls(context, r1_tool_calls)
|
|
63
|
+
|
|
64
|
+
# --- Context Assembly for Round 2 ---
|
|
65
|
+
|
|
66
|
+
# Summarize Round 1 actions for context
|
|
67
|
+
r1_summary_text = "## Round 1 Execution Summary\n"
|
|
68
|
+
if r1_content:
|
|
69
|
+
r1_summary_text += f"Thought: {r1_content}\n"
|
|
70
|
+
|
|
71
|
+
if r1_tool_outputs:
|
|
72
|
+
r1_summary_text += "Tools Executed & Results:\n"
|
|
73
|
+
for output in r1_tool_outputs:
|
|
74
|
+
# content here is the tool output (e.g. search results text or crawl preview)
|
|
75
|
+
r1_summary_text += f"- Action: {output['name']}\n"
|
|
76
|
+
r1_summary_text += f" Result: {output['content']}\n"
|
|
77
|
+
else:
|
|
78
|
+
r1_summary_text += "No tools were executed in Round 1.\n"
|
|
79
|
+
|
|
80
|
+
r2_context_str = f"""User Query: {context.user_input}
|
|
81
|
+
|
|
82
|
+
{r1_summary_text}
|
|
83
|
+
"""
|
|
84
|
+
# Save to context for next stage
|
|
85
|
+
context.review_context = r2_context_str
|
|
86
|
+
|
|
87
|
+
# Update instruct_history for logging/record purposes
|
|
88
|
+
context.instruct_history.append({
|
|
89
|
+
"role": "assistant",
|
|
90
|
+
"content": f"[Round 1 Thought]: {r1_content}\n[Round 1 Actions]: {len(r1_tool_outputs)} tools"
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
return self._build_result(start_time, r1_usage, r1_content, len(r1_tool_calls or []))
|
|
94
|
+
|
|
95
|
+
def _build_user_message(self, context: StageContext) -> Any:
|
|
96
|
+
text_prompt = f"User Query: {context.user_input}"
|
|
97
|
+
if context.images:
|
|
98
|
+
user_content: List[Dict[str, Any]] = [{"type": "text", "text": text_prompt}]
|
|
99
|
+
for img_b64 in context.images:
|
|
100
|
+
url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
101
|
+
user_content.append({"type": "image_url", "image_url": {"url": url}})
|
|
102
|
+
return user_content
|
|
103
|
+
return text_prompt
|
|
104
|
+
|
|
105
|
+
async def _call_llm(self, messages, tools, tool_choice="auto"):
|
|
106
|
+
model_cfg = self.config.get_model_config("instruct")
|
|
107
|
+
client = self._client_for(
|
|
108
|
+
api_key=model_cfg.get("api_key"),
|
|
109
|
+
base_url=model_cfg.get("base_url")
|
|
110
|
+
)
|
|
111
|
+
model = model_cfg.get("model_name") or self.config.model_name
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
response = await client.chat.completions.create(
|
|
115
|
+
model=model,
|
|
116
|
+
messages=messages,
|
|
117
|
+
tools=tools,
|
|
118
|
+
tool_choice=tool_choice,
|
|
119
|
+
temperature=self.config.temperature,
|
|
120
|
+
extra_body=model_cfg.get("extra_body"),
|
|
121
|
+
)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"InstructStage LLM Error: {e}")
|
|
124
|
+
raise e
|
|
125
|
+
|
|
126
|
+
usage = {"input_tokens": 0, "output_tokens": 0}
|
|
127
|
+
if hasattr(response, "usage") and response.usage:
|
|
128
|
+
usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
129
|
+
usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
130
|
+
|
|
131
|
+
message = response.choices[0].message
|
|
132
|
+
content = message.content or ""
|
|
133
|
+
tool_calls = message.tool_calls
|
|
134
|
+
|
|
135
|
+
if content:
|
|
136
|
+
logger.debug(f"Instruct: Agent Thought -> {content[:100]}...")
|
|
137
|
+
|
|
138
|
+
return response, usage, tool_calls, content
|
|
139
|
+
|
|
140
|
+
async def _process_tool_calls(self, context: StageContext, tool_calls: List[Any]) -> List[Dict[str, Any]]:
|
|
141
|
+
"""
|
|
142
|
+
Executes tool calls and returns a list of outputs for context building.
|
|
143
|
+
Updates context.web_results globally.
|
|
144
|
+
"""
|
|
145
|
+
pending_crawls = [] # List of (url, tool_call_id)
|
|
146
|
+
pending_searches = [] # List of (query, tool_call_id)
|
|
147
|
+
|
|
148
|
+
results_for_context = []
|
|
149
|
+
|
|
150
|
+
for tc in tool_calls:
|
|
151
|
+
name = tc.function.name
|
|
152
|
+
tc_id = tc.id
|
|
153
|
+
try:
|
|
154
|
+
args = json.loads(tc.function.arguments)
|
|
155
|
+
except json.JSONDecodeError:
|
|
156
|
+
results_for_context.append({
|
|
157
|
+
"id": tc_id, "name": name, "content": "Error: Invalid JSON arguments"
|
|
158
|
+
})
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
if name == "refuse_answer":
|
|
162
|
+
reason = args.get("reason", "Refused")
|
|
163
|
+
logger.warning(f"Instruct: Model Refused Answer. Reason: {reason}")
|
|
164
|
+
context.should_refuse = True
|
|
165
|
+
context.refuse_reason = reason
|
|
166
|
+
results_for_context.append({
|
|
167
|
+
"id": tc_id, "name": name, "content": f"Refused: {reason}"
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
elif name == "internal_web_search":
|
|
171
|
+
query = args.get("query")
|
|
172
|
+
if query:
|
|
173
|
+
logger.info(f"Instruct: Planned search query -> '{query}'")
|
|
174
|
+
pending_searches.append((query, tc_id))
|
|
175
|
+
|
|
176
|
+
elif name == "crawl_page":
|
|
177
|
+
url = args.get("url")
|
|
178
|
+
if url:
|
|
179
|
+
logger.info(f"Instruct: Planned page crawl -> {url}")
|
|
180
|
+
pending_crawls.append((url, tc_id))
|
|
181
|
+
|
|
182
|
+
# Execute Batches
|
|
183
|
+
|
|
184
|
+
# 1. Crawls
|
|
185
|
+
if pending_crawls:
|
|
186
|
+
urls = [u for u, _ in pending_crawls]
|
|
187
|
+
logger.info(f"Instruct: Executing {len(urls)} crawls via batch...")
|
|
188
|
+
|
|
189
|
+
# Start fetch
|
|
190
|
+
fetch_task = asyncio.create_task(self.search_service.fetch_pages_batch(urls))
|
|
191
|
+
|
|
192
|
+
is_image_mode = getattr(self.config, "page_content_mode", "text") == "image"
|
|
193
|
+
tab_ids = []
|
|
194
|
+
if is_image_mode:
|
|
195
|
+
from .render_vue import get_content_renderer
|
|
196
|
+
renderer = await get_content_renderer()
|
|
197
|
+
loop = asyncio.get_running_loop()
|
|
198
|
+
tab_tasks = [
|
|
199
|
+
loop.run_in_executor(renderer._executor, renderer._prepare_tab_sync)
|
|
200
|
+
for _ in urls
|
|
201
|
+
]
|
|
202
|
+
tab_ids = await asyncio.gather(*tab_tasks, return_exceptions=True)
|
|
203
|
+
logger.debug(f"Instruct: Prepared {len(tab_ids)} tabs: {tab_ids}")
|
|
204
|
+
|
|
205
|
+
crawl_results_list = await fetch_task
|
|
206
|
+
|
|
207
|
+
if is_image_mode and tab_ids:
|
|
208
|
+
theme_color = getattr(self.config, "theme_color", "#ef4444")
|
|
209
|
+
render_tasks = []
|
|
210
|
+
valid_pairs = []
|
|
211
|
+
MAX_CHARS = 3000
|
|
212
|
+
for i, (page_data, tab_id) in enumerate(zip(crawl_results_list, tab_ids)):
|
|
213
|
+
if isinstance(tab_id, Exception):
|
|
214
|
+
logger.warning(f"Instruct: Skip rendering page {i} due to tab error: {tab_id}")
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
# Truncate content to avoid excessive size
|
|
218
|
+
content = page_data.get("content", "")
|
|
219
|
+
if len(content) > MAX_CHARS:
|
|
220
|
+
content = content[:MAX_CHARS] + "\n\n...(content truncated for length)..."
|
|
221
|
+
page_data["content"] = content
|
|
222
|
+
|
|
223
|
+
if not content:
|
|
224
|
+
logger.warning(f"Instruct: Skip rendering page {i} due to empty content")
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
valid_pairs.append((i, page_data))
|
|
228
|
+
render_tasks.append(
|
|
229
|
+
loop.run_in_executor(
|
|
230
|
+
renderer._executor,
|
|
231
|
+
renderer._render_page_to_b64_sync,
|
|
232
|
+
{"title": page_data.get("title", "Page"), "content": content},
|
|
233
|
+
tab_id,
|
|
234
|
+
theme_color
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if render_tasks:
|
|
239
|
+
logger.debug(f"Instruct: Parallel rendering {len(render_tasks)} pages...")
|
|
240
|
+
screenshots = await asyncio.gather(*render_tasks, return_exceptions=True)
|
|
241
|
+
logger.debug(f"Instruct: Parallel rendering finished. Results count: {len(screenshots)}")
|
|
242
|
+
for j, (orig_idx, page_data) in enumerate(valid_pairs):
|
|
243
|
+
if j < len(screenshots) and not isinstance(screenshots[j], Exception):
|
|
244
|
+
crawl_results_list[orig_idx]["screenshot_b64"] = screenshots[j]
|
|
245
|
+
|
|
246
|
+
for i, (url, tc_id) in enumerate(pending_crawls):
|
|
247
|
+
page_data = crawl_results_list[i]
|
|
248
|
+
title = page_data.get("title", "Unknown")
|
|
249
|
+
|
|
250
|
+
# Update global context
|
|
251
|
+
page_item = {
|
|
252
|
+
"_id": context.next_id(),
|
|
253
|
+
"_type": "page",
|
|
254
|
+
"title": page_data.get("title", "Page"),
|
|
255
|
+
"url": page_data.get("url", url),
|
|
256
|
+
"content": page_data.get("content", ""),
|
|
257
|
+
"is_crawled": True,
|
|
258
|
+
}
|
|
259
|
+
if page_data.get("screenshot_b64"):
|
|
260
|
+
page_item["screenshot_b64"] = page_data["screenshot_b64"]
|
|
261
|
+
if page_data.get("raw_screenshot_b64"):
|
|
262
|
+
page_item["raw_screenshot_b64"] = page_data["raw_screenshot_b64"]
|
|
263
|
+
if page_data.get("images"):
|
|
264
|
+
page_item["images"] = page_data["images"]
|
|
265
|
+
|
|
266
|
+
context.web_results.append(page_item)
|
|
267
|
+
|
|
268
|
+
# Output for Context Assembly
|
|
269
|
+
content_preview = page_data.get("content", "")[:500]
|
|
270
|
+
results_for_context.append({
|
|
271
|
+
"id": tc_id,
|
|
272
|
+
"name": "crawl_page",
|
|
273
|
+
"content": f"Crawled '{title}' ({url}):\n{content_preview}..."
|
|
274
|
+
})
|
|
275
|
+
|
|
276
|
+
# 2. Searches
|
|
277
|
+
if pending_searches:
|
|
278
|
+
queries = [q for q, _ in pending_searches]
|
|
279
|
+
logger.info(f"Instruct: Executing {len(queries)} searches via batch...")
|
|
280
|
+
|
|
281
|
+
search_results_list = await self.search_service.search_batch(queries)
|
|
282
|
+
|
|
283
|
+
for i, (query, tc_id) in enumerate(pending_searches):
|
|
284
|
+
web_results = search_results_list[i]
|
|
285
|
+
visible_results = [r for r in web_results if not r.get("_hidden")]
|
|
286
|
+
|
|
287
|
+
# Update global context
|
|
288
|
+
for item in web_results:
|
|
289
|
+
item["_id"] = context.next_id()
|
|
290
|
+
if "type" in item:
|
|
291
|
+
item["_type"] = item["type"]
|
|
292
|
+
elif "_type" not in item:
|
|
293
|
+
item["_type"] = "search"
|
|
294
|
+
item["query"] = query
|
|
295
|
+
context.web_results.append(item)
|
|
296
|
+
|
|
297
|
+
# Output for Context Assembly
|
|
298
|
+
summary = f"Found {len(visible_results)} results for '{query}':\n"
|
|
299
|
+
for r in visible_results[:5]:
|
|
300
|
+
summary += f"- {r.get('title')} ({r.get('url')}): {(r.get('content') or '')[:100]}...\n"
|
|
301
|
+
|
|
302
|
+
results_for_context.append({
|
|
303
|
+
"id": tc_id,
|
|
304
|
+
"name": "internal_web_search",
|
|
305
|
+
"content": summary
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
return results_for_context
|
|
309
|
+
|
|
310
|
+
def _build_result(self, start_time, usage, content, tool_calls_count):
|
|
311
|
+
model_cfg = self.config.get_model_config("instruct")
|
|
312
|
+
model = model_cfg.get("model_name") or self.config.model_name
|
|
313
|
+
|
|
314
|
+
trace = {
|
|
315
|
+
"stage": "Instruct",
|
|
316
|
+
"model": model,
|
|
317
|
+
"usage": usage,
|
|
318
|
+
"output": content,
|
|
319
|
+
"tool_calls": tool_calls_count,
|
|
320
|
+
"time": time.time() - start_time,
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return StageResult(
|
|
324
|
+
success=True,
|
|
325
|
+
data={"reasoning": content},
|
|
326
|
+
usage=usage,
|
|
327
|
+
trace=trace
|
|
328
|
+
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Instruct Review Stage
|
|
3
|
+
|
|
4
|
+
Handles the second round of instruction: Review and Refine.
|
|
5
|
+
Inherits from InstructStage to reuse tool execution logic.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any, List
|
|
10
|
+
from loguru import logger
|
|
11
|
+
from openai import AsyncOpenAI
|
|
12
|
+
|
|
13
|
+
from .stage_base import StageContext, StageResult
|
|
14
|
+
from .stage_instruct import InstructStage
|
|
15
|
+
from .definitions import INSTRUCT_REVIEW_SP
|
|
16
|
+
|
|
17
|
+
class InstructReviewStage(InstructStage):
|
|
18
|
+
@property
|
|
19
|
+
def name(self) -> str:
|
|
20
|
+
return "Instruct Review"
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: Any, search_service: Any, client: AsyncOpenAI):
|
|
23
|
+
super().__init__(config, search_service, client)
|
|
24
|
+
# Inherits tools from InstructStage
|
|
25
|
+
|
|
26
|
+
async def execute(self, context: StageContext) -> StageResult:
|
|
27
|
+
start_time = time.time()
|
|
28
|
+
logger.info("Instruct Review: Starting Round 2 (Review & Refine)")
|
|
29
|
+
|
|
30
|
+
# Check if we have context to review
|
|
31
|
+
if not context.review_context:
|
|
32
|
+
logger.warning("Instruct Review: No context found from Round 1. Skipping.")
|
|
33
|
+
return StageResult(success=True, data={"reasoning": "Skipped due to missing context."})
|
|
34
|
+
|
|
35
|
+
# Build System Prompt (Clean)
|
|
36
|
+
system_prompt = INSTRUCT_REVIEW_SP
|
|
37
|
+
|
|
38
|
+
# Build Messages
|
|
39
|
+
# Inject context as a separate user message explaining the background
|
|
40
|
+
context_message = f"## Previous Round Context\n\n```context\n{context.review_context}\n```"
|
|
41
|
+
|
|
42
|
+
messages = [
|
|
43
|
+
{"role": "system", "content": system_prompt},
|
|
44
|
+
{"role": "user", "content": context_message},
|
|
45
|
+
{"role": "user", "content": self._build_user_message(context)}
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
# Call LLM
|
|
49
|
+
# We reuse _call_llm from parent
|
|
50
|
+
# We reuse tools from parent (refuse_answer might be redundant but harmless, or we can filter)
|
|
51
|
+
tools = [self.web_search_tool, self.crawl_page_tool] # Review prompt doesn't mention refuse_answer explicitly, but usually fine.
|
|
52
|
+
|
|
53
|
+
response, usage, tool_calls, content = await self._call_llm(
|
|
54
|
+
messages=messages,
|
|
55
|
+
tools=tools,
|
|
56
|
+
tool_choice="auto"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Execute Tools
|
|
60
|
+
tool_outputs = []
|
|
61
|
+
if tool_calls:
|
|
62
|
+
tool_outputs = await self._process_tool_calls(context, tool_calls)
|
|
63
|
+
|
|
64
|
+
# Update history logic?
|
|
65
|
+
# The prompt says "上下文". It is "independent".
|
|
66
|
+
# But for the record, we might want to log it.
|
|
67
|
+
context.instruct_history.append({
|
|
68
|
+
"role": "assistant",
|
|
69
|
+
"content": f"[Round 2 Review]: {content}\n[Round 2 Actions]: {len(tool_outputs)} tools"
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
return self._build_result(start_time, usage, content, len(tool_calls or []))
|
|
73
|
+
|
|
74
|
+
def _build_result(self, start_time, usage, content, tool_calls_count):
|
|
75
|
+
model_cfg = self.config.get_model_config("instruct")
|
|
76
|
+
model = model_cfg.get("model_name") or self.config.model_name
|
|
77
|
+
|
|
78
|
+
trace = {
|
|
79
|
+
"stage": "Instruct Review",
|
|
80
|
+
"model": model,
|
|
81
|
+
"usage": usage,
|
|
82
|
+
"output": content,
|
|
83
|
+
"tool_calls": tool_calls_count,
|
|
84
|
+
"time": time.time() - start_time,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return StageResult(
|
|
88
|
+
success=True,
|
|
89
|
+
data={"reasoning": content},
|
|
90
|
+
usage=usage,
|
|
91
|
+
trace=trace
|
|
92
|
+
)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Summary Stage
|
|
3
|
+
|
|
4
|
+
Generates final response based on gathered information.
|
|
5
|
+
Different output formats for different modes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
from openai import AsyncOpenAI
|
|
14
|
+
|
|
15
|
+
from .stage_base import BaseStage, StageContext, StageResult
|
|
16
|
+
from .definitions import SUMMARY_REPORT_SP
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SummaryStage(BaseStage):
|
|
20
|
+
"""
|
|
21
|
+
Summary Stage: Generate final response.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
return "Summary"
|
|
27
|
+
|
|
28
|
+
async def execute(
|
|
29
|
+
self,
|
|
30
|
+
context: StageContext,
|
|
31
|
+
images: List[str] = None
|
|
32
|
+
) -> StageResult:
|
|
33
|
+
"""Generate summary."""
|
|
34
|
+
start_time = time.time()
|
|
35
|
+
|
|
36
|
+
# Format context from web results
|
|
37
|
+
web_content = self._format_web_content(context)
|
|
38
|
+
full_context = f"{context.agent_context}\n\n{web_content}"
|
|
39
|
+
|
|
40
|
+
# Select prompt
|
|
41
|
+
language = getattr(self.config, "language", "Simplified Chinese")
|
|
42
|
+
|
|
43
|
+
system_prompt = SUMMARY_REPORT_SP.format(
|
|
44
|
+
language=language
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Build Context Message
|
|
48
|
+
context_message = f"## Web Search & Page Content\n\n```context\n{full_context}\n```"
|
|
49
|
+
|
|
50
|
+
# Build user content
|
|
51
|
+
user_text = context.user_input or "..."
|
|
52
|
+
if images:
|
|
53
|
+
user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
|
|
54
|
+
for img_b64 in images:
|
|
55
|
+
url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
56
|
+
user_content.append({"type": "image_url", "image_url": {"url": url}})
|
|
57
|
+
else:
|
|
58
|
+
user_content = user_text
|
|
59
|
+
|
|
60
|
+
messages = [
|
|
61
|
+
{"role": "system", "content": system_prompt},
|
|
62
|
+
{"role": "user", "content": context_message},
|
|
63
|
+
{"role": "user", "content": user_content}
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Get model config
|
|
67
|
+
model_cfg = self.config.get_model_config("main")
|
|
68
|
+
|
|
69
|
+
client = self._client_for(
|
|
70
|
+
api_key=model_cfg.get("api_key"),
|
|
71
|
+
base_url=model_cfg.get("base_url")
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
model = model_cfg.get("model_name") or self.config.model_name
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
response = await client.chat.completions.create(
|
|
78
|
+
model=model,
|
|
79
|
+
messages=messages,
|
|
80
|
+
temperature=self.config.temperature,
|
|
81
|
+
extra_body=getattr(self.config, "summary_extra_body", None),
|
|
82
|
+
)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(f"SummaryStage LLM error: {e}")
|
|
85
|
+
return StageResult(
|
|
86
|
+
success=False,
|
|
87
|
+
error=str(e),
|
|
88
|
+
data={"content": f"Error generating summary: {e}"}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
usage = {"input_tokens": 0, "output_tokens": 0}
|
|
92
|
+
if hasattr(response, "usage") and response.usage:
|
|
93
|
+
usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
94
|
+
usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
95
|
+
|
|
96
|
+
content = (response.choices[0].message.content or "").strip()
|
|
97
|
+
|
|
98
|
+
return StageResult(
|
|
99
|
+
success=True,
|
|
100
|
+
data={"content": content},
|
|
101
|
+
usage=usage,
|
|
102
|
+
trace={
|
|
103
|
+
"model": model,
|
|
104
|
+
"provider": model_cfg.get("model_provider") or "Unknown",
|
|
105
|
+
"usage": usage,
|
|
106
|
+
"system_prompt": system_prompt,
|
|
107
|
+
"output": content,
|
|
108
|
+
"time": time.time() - start_time,
|
|
109
|
+
"images_count": len(images) if images else 0,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _strip_links(self, text: str) -> str:
|
|
114
|
+
"""Strip markdown links [text](url) -> text and remove bare URLs."""
|
|
115
|
+
# Replace [text](url) with text
|
|
116
|
+
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
|
117
|
+
# Remove bare URLs (http/https) roughly, trying to preserve surrounding text if possible?
|
|
118
|
+
# A simple pattern for http/s
|
|
119
|
+
text = re.sub(r'https?://\S+', '', text)
|
|
120
|
+
return text
|
|
121
|
+
|
|
122
|
+
def _format_web_content(self, context: StageContext) -> str:
|
|
123
|
+
"""Format web results for summary prompt."""
|
|
124
|
+
if not context.web_results:
|
|
125
|
+
return ""
|
|
126
|
+
|
|
127
|
+
# Sort results: pages first, then raw searches, then snippets
|
|
128
|
+
def get_priority(item_type):
|
|
129
|
+
if item_type == "page": return 0
|
|
130
|
+
if item_type == "search_raw_page": return 1
|
|
131
|
+
return 2 # search (snippets)
|
|
132
|
+
|
|
133
|
+
sorted_results = sorted(
|
|
134
|
+
context.web_results,
|
|
135
|
+
key=lambda x: get_priority(x.get("_type"))
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
lines = []
|
|
139
|
+
seen_urls = set()
|
|
140
|
+
|
|
141
|
+
for res in sorted_results:
|
|
142
|
+
type_ = res.get("_type")
|
|
143
|
+
idx = res.get("_id")
|
|
144
|
+
title = (res.get("title", "") or "").strip()
|
|
145
|
+
url = res.get("url", "")
|
|
146
|
+
|
|
147
|
+
# Deduplicate items by URL (keep higher priority item only)
|
|
148
|
+
if url:
|
|
149
|
+
if url in seen_urls:
|
|
150
|
+
continue
|
|
151
|
+
seen_urls.add(url)
|
|
152
|
+
|
|
153
|
+
# url = res.get("url", "") # Removed as requested
|
|
154
|
+
|
|
155
|
+
if type_ == "page":
|
|
156
|
+
content = (res.get("content", "") or "").strip()
|
|
157
|
+
content = self._strip_links(content)
|
|
158
|
+
lines.append(f"[{idx}] Title: {title}\nContent:\n{content}\n")
|
|
159
|
+
elif type_ == "search":
|
|
160
|
+
snippet = (res.get("content", "") or "").strip()
|
|
161
|
+
snippet = self._strip_links(snippet)
|
|
162
|
+
lines.append(f"[{idx}] Title: {title}\nSnippet: {snippet}\n")
|
|
163
|
+
|
|
164
|
+
return "\n".join(lines)
|