entari-plugin-hyw 4.0.0rc6__py3-none-any.whl → 4.0.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +24 -2
- entari_plugin_hyw/assets/card-dist/index.html +26 -26
- entari_plugin_hyw/browser/engines/default.py +166 -0
- entari_plugin_hyw/browser/manager.py +1 -1
- entari_plugin_hyw/browser/service.py +268 -27
- entari_plugin_hyw/card-ui/src/App.vue +32 -1
- entari_plugin_hyw/definitions.py +22 -3
- entari_plugin_hyw/history.py +34 -44
- entari_plugin_hyw/modular_pipeline.py +130 -36
- entari_plugin_hyw/search.py +45 -9
- entari_plugin_hyw/stage_base.py +3 -0
- entari_plugin_hyw/stage_instruct.py +13 -3
- entari_plugin_hyw/stage_summary.py +6 -0
- entari_plugin_hyw/stage_vision.py +113 -0
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc7.dist-info}/METADATA +1 -1
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc7.dist-info}/RECORD +18 -16
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc7.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc7.dist-info}/top_level.txt +0 -0
entari_plugin_hyw/history.py
CHANGED
|
@@ -75,7 +75,7 @@ class HistoryManager:
|
|
|
75
75
|
self._context_history[context_id] = []
|
|
76
76
|
self._context_history[context_id].append(key)
|
|
77
77
|
|
|
78
|
-
def save_to_disk(self, key: str, save_root: str = "data/conversations", image_path: Optional[str] = None, web_results: Optional[List[Dict]] = None):
|
|
78
|
+
def save_to_disk(self, key: str, save_root: str = "data/conversations", image_path: Optional[str] = None, web_results: Optional[List[Dict]] = None, vision_trace: Optional[Dict] = None, instruct_traces: Optional[List[Dict]] = None):
|
|
79
79
|
"""Save conversation history to specific folder structure"""
|
|
80
80
|
import os
|
|
81
81
|
import time
|
|
@@ -198,51 +198,41 @@ class HistoryManager:
|
|
|
198
198
|
except Exception as e:
|
|
199
199
|
print(f"Failed to copy output image: {e}")
|
|
200
200
|
|
|
201
|
-
# 4. Save
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
md_content += "## History\n\n"
|
|
212
|
-
|
|
213
|
-
for msg in self._history[key]:
|
|
214
|
-
role = msg.get("role", "unknown").upper()
|
|
215
|
-
content = msg.get("content", "")
|
|
216
|
-
|
|
217
|
-
md_content += f"### {role}\n\n"
|
|
201
|
+
# 4. Save Vision Log (if vision stage was used)
|
|
202
|
+
if vision_trace and not vision_trace.get("skipped"):
|
|
203
|
+
vision_md = "# Vision Stage Log\n\n"
|
|
204
|
+
vision_md += f"- **Model**: {vision_trace.get('model', 'unknown')}\n"
|
|
205
|
+
vision_md += f"- **Time**: {vision_trace.get('time', 0):.2f}s\n"
|
|
206
|
+
vision_md += f"- **Images Count**: {vision_trace.get('images_count', 0)}\n"
|
|
207
|
+
vision_md += f"- **Input Tokens**: {vision_trace.get('usage', {}).get('input_tokens', 0)}\n"
|
|
208
|
+
vision_md += f"- **Output Tokens**: {vision_trace.get('usage', {}).get('output_tokens', 0)}\n\n"
|
|
209
|
+
vision_md += "## Vision Description Output\n\n"
|
|
210
|
+
vision_md += f"```\n{vision_trace.get('output', '')}\n```\n"
|
|
218
211
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
try:
|
|
222
|
-
tc_str = json.dumps(tool_calls, ensure_ascii=False, indent=2)
|
|
223
|
-
except:
|
|
224
|
-
tc_str = str(tool_calls)
|
|
225
|
-
md_content += f"**Tool Calls**:\n```json\n{tc_str}\n```\n\n"
|
|
226
|
-
|
|
227
|
-
if role == "TOOL":
|
|
228
|
-
try:
|
|
229
|
-
# Try parsing as JSON first
|
|
230
|
-
if isinstance(content, str):
|
|
231
|
-
parsed = json.loads(content)
|
|
232
|
-
pretty = json.dumps(parsed, ensure_ascii=False, indent=2)
|
|
233
|
-
md_content += f"**Output**:\n```json\n{pretty}\n```\n\n"
|
|
234
|
-
else:
|
|
235
|
-
md_content += f"**Output**:\n```text\n{content}\n```\n\n"
|
|
236
|
-
except:
|
|
237
|
-
md_content += f"**Output**:\n```text\n{content}\n```\n\n"
|
|
238
|
-
else:
|
|
239
|
-
if content:
|
|
240
|
-
md_content += f"{content}\n\n"
|
|
241
|
-
|
|
242
|
-
md_content += "---\n\n"
|
|
212
|
+
with open(os.path.join(folder_path, "vision_log.md"), "w", encoding="utf-8") as f:
|
|
213
|
+
f.write(vision_md)
|
|
243
214
|
|
|
244
|
-
|
|
245
|
-
|
|
215
|
+
# 5. Save Instruct Log (all instruct rounds)
|
|
216
|
+
if instruct_traces:
|
|
217
|
+
instruct_md = "# Instruct Stage Log\n\n"
|
|
218
|
+
for i, trace in enumerate(instruct_traces):
|
|
219
|
+
stage_name = trace.get("stage_name", f"Round {i+1}")
|
|
220
|
+
instruct_md += f"## {stage_name}\n\n"
|
|
221
|
+
instruct_md += f"- **Model**: {trace.get('model', 'unknown')}\n"
|
|
222
|
+
instruct_md += f"- **Time**: {trace.get('time', 0):.2f}s\n"
|
|
223
|
+
instruct_md += f"- **Tool Calls**: {trace.get('tool_calls', 0)}\n"
|
|
224
|
+
instruct_md += f"- **Input Tokens**: {trace.get('usage', {}).get('input_tokens', 0)}\n"
|
|
225
|
+
instruct_md += f"- **Output Tokens**: {trace.get('usage', {}).get('output_tokens', 0)}\n\n"
|
|
226
|
+
|
|
227
|
+
output = trace.get("output", "")
|
|
228
|
+
if output:
|
|
229
|
+
instruct_md += "### Reasoning Output\n\n"
|
|
230
|
+
instruct_md += f"```\n{output}\n```\n\n"
|
|
231
|
+
|
|
232
|
+
instruct_md += "---\n\n"
|
|
233
|
+
|
|
234
|
+
with open(os.path.join(folder_path, "instruct_log.md"), "w", encoding="utf-8") as f:
|
|
235
|
+
f.write(instruct_md)
|
|
246
236
|
|
|
247
237
|
except Exception as e:
|
|
248
238
|
print(f"Failed to save conversation: {e}")
|
|
@@ -16,6 +16,7 @@ from .stage_base import StageContext
|
|
|
16
16
|
from .stage_instruct import InstructStage
|
|
17
17
|
from .stage_instruct_deepsearch import InstructDeepsearchStage
|
|
18
18
|
from .stage_summary import SummaryStage
|
|
19
|
+
from .stage_vision import VisionStage
|
|
19
20
|
from .search import SearchService
|
|
20
21
|
|
|
21
22
|
|
|
@@ -36,9 +37,15 @@ class ModularPipeline:
|
|
|
36
37
|
self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
|
|
37
38
|
|
|
38
39
|
# Initialize stages
|
|
39
|
-
self.instruct_stage = InstructStage(config, self.search_service, self.client)
|
|
40
|
+
self.instruct_stage = InstructStage(config, self.search_service, self.client, send_func=send_func)
|
|
40
41
|
self.instruct_deepsearch_stage = InstructDeepsearchStage(config, self.search_service, self.client)
|
|
41
42
|
self.summary_stage = SummaryStage(config, self.search_service, self.client)
|
|
43
|
+
self.vision_stage = VisionStage(config, self.search_service, self.client)
|
|
44
|
+
|
|
45
|
+
def _has_vision_model(self) -> bool:
|
|
46
|
+
"""Check if a vision model is configured."""
|
|
47
|
+
vision_cfg = self.config.get_model_config("vision")
|
|
48
|
+
return bool(vision_cfg.get("model_name"))
|
|
42
49
|
|
|
43
50
|
async def execute(
|
|
44
51
|
self,
|
|
@@ -54,6 +61,9 @@ class ModularPipeline:
|
|
|
54
61
|
stats = {"start_time": start_time}
|
|
55
62
|
usage_totals = {"input_tokens": 0, "output_tokens": 0}
|
|
56
63
|
active_model = model_name or self.config.model_name
|
|
64
|
+
if not active_model:
|
|
65
|
+
# Fallback to instruct model for logging/context
|
|
66
|
+
active_model = self.config.get_model_config("instruct").get("model_name")
|
|
57
67
|
|
|
58
68
|
context = StageContext(
|
|
59
69
|
user_input=user_input,
|
|
@@ -79,6 +89,24 @@ class ModularPipeline:
|
|
|
79
89
|
try:
|
|
80
90
|
logger.info(f"Pipeline: Processing '{user_input[:30]}...'")
|
|
81
91
|
|
|
92
|
+
# === Stage 0: Vision (if images and vision model configured) ===
|
|
93
|
+
if images and self._has_vision_model():
|
|
94
|
+
logger.info("Pipeline: Stage 0 - Vision (generating image description)")
|
|
95
|
+
vision_result = await self.vision_stage.execute(context, images)
|
|
96
|
+
|
|
97
|
+
if vision_result.success and vision_result.data.get("description"):
|
|
98
|
+
context.vision_description = vision_result.data["description"]
|
|
99
|
+
logger.info(f"Pipeline: Vision description generated ({len(context.vision_description)} chars)")
|
|
100
|
+
|
|
101
|
+
# Add vision trace
|
|
102
|
+
trace["vision"] = vision_result.trace
|
|
103
|
+
usage_totals["input_tokens"] += vision_result.usage.get("input_tokens", 0)
|
|
104
|
+
usage_totals["output_tokens"] += vision_result.usage.get("output_tokens", 0)
|
|
105
|
+
|
|
106
|
+
# Clear images since we have the description now
|
|
107
|
+
# (don't pass raw images to later stages when using vision model)
|
|
108
|
+
images = []
|
|
109
|
+
|
|
82
110
|
# === Stage 1: Instruct (Initial Discovery) ===
|
|
83
111
|
logger.info("Pipeline: Stage 1 - Instruct")
|
|
84
112
|
instruct_result = await self.instruct_stage.execute(context)
|
|
@@ -115,20 +143,73 @@ class ModularPipeline:
|
|
|
115
143
|
else:
|
|
116
144
|
logger.info("Pipeline: Mode is 'fast', skipping deepsearch stage")
|
|
117
145
|
|
|
118
|
-
# ===
|
|
119
|
-
#
|
|
120
|
-
|
|
146
|
+
# === Parallel Execution: Summary Generation + Image Prefetching ===
|
|
147
|
+
# We run image prefetching concurrently with Summary generation to save time.
|
|
148
|
+
|
|
149
|
+
# 1. Prepare candidates for prefetch (all images in search results)
|
|
150
|
+
all_candidate_urls = set()
|
|
151
|
+
for r in context.web_results:
|
|
152
|
+
# Add images from search results/pages
|
|
153
|
+
if r.get("images"):
|
|
154
|
+
for img in r["images"]:
|
|
155
|
+
if img and isinstance(img, str) and img.startswith("http"):
|
|
156
|
+
all_candidate_urls.add(img)
|
|
157
|
+
|
|
158
|
+
prefetch_list = list(all_candidate_urls)
|
|
159
|
+
logger.info(f"Pipeline: Starting parallel execution (Summary + Prefetch {len(prefetch_list)} images)")
|
|
160
|
+
|
|
161
|
+
# 2. Define parallel tasks with timing
|
|
162
|
+
async def timed_summary():
|
|
163
|
+
t0 = time.time()
|
|
164
|
+
# Collect page screenshots if image mode
|
|
165
|
+
summary_input_images = list(images) if images else []
|
|
166
|
+
if context.image_input_supported:
|
|
167
|
+
# Collect pre-rendered screenshots from web_results
|
|
168
|
+
for r in context.web_results:
|
|
169
|
+
if r.get("_type") == "page" and r.get("screenshot_b64"):
|
|
170
|
+
summary_input_images.append(r["screenshot_b64"])
|
|
171
|
+
|
|
172
|
+
res = await self.summary_stage.execute(
|
|
173
|
+
context,
|
|
174
|
+
images=summary_input_images if summary_input_images else None
|
|
175
|
+
)
|
|
176
|
+
duration = time.time() - t0
|
|
177
|
+
return res, duration
|
|
178
|
+
|
|
179
|
+
async def timed_prefetch():
|
|
180
|
+
t0 = time.time()
|
|
181
|
+
if not prefetch_list:
|
|
182
|
+
return {}, 0.0
|
|
183
|
+
try:
|
|
184
|
+
from .image_cache import get_image_cache
|
|
185
|
+
cache = get_image_cache()
|
|
186
|
+
# Start prefetch (non-blocking kickoff)
|
|
187
|
+
cache.start_prefetch(prefetch_list)
|
|
188
|
+
# Wait for results (blocking until done)
|
|
189
|
+
res = await cache.get_all_cached(prefetch_list)
|
|
190
|
+
duration = time.time() - t0
|
|
191
|
+
return res, duration
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.warning(f"Pipeline: Prefetch failed: {e}")
|
|
194
|
+
return {}, time.time() - t0
|
|
195
|
+
|
|
196
|
+
# 3. Execute concurrently
|
|
197
|
+
summary_task = asyncio.create_task(timed_summary())
|
|
198
|
+
prefetch_task = asyncio.create_task(timed_prefetch())
|
|
199
|
+
|
|
200
|
+
# Wait for both to complete
|
|
201
|
+
await asyncio.wait([summary_task, prefetch_task])
|
|
202
|
+
|
|
203
|
+
# 4. Process results and log timing
|
|
204
|
+
summary_result, summary_time = await summary_task
|
|
205
|
+
cached_map, prefetch_time = await prefetch_task
|
|
121
206
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
207
|
+
time_diff = abs(summary_time - prefetch_time)
|
|
208
|
+
if summary_time > prefetch_time:
|
|
209
|
+
logger.info(f"Pipeline: Image Prefetch finished first ({prefetch_time:.2f}s). Summary took {summary_time:.2f}s. (Waited {time_diff:.2f}s for Summary)")
|
|
210
|
+
else:
|
|
211
|
+
logger.info(f"Pipeline: Summary finished first ({summary_time:.2f}s). Image Prefetch took {prefetch_time:.2f}s. (Waited {time_diff:.2f}s for Prefetch)")
|
|
127
212
|
|
|
128
|
-
summary_result = await self.summary_stage.execute(
|
|
129
|
-
context,
|
|
130
|
-
images=all_images if all_images else None
|
|
131
|
-
)
|
|
132
213
|
trace["summary"] = summary_result.trace
|
|
133
214
|
usage_totals["input_tokens"] += summary_result.usage.get("input_tokens", 0)
|
|
134
215
|
usage_totals["output_tokens"] += summary_result.usage.get("output_tokens", 0)
|
|
@@ -139,40 +220,30 @@ class ModularPipeline:
|
|
|
139
220
|
stats["total_time"] = time.time() - start_time
|
|
140
221
|
structured = self._parse_response(summary_content, context)
|
|
141
222
|
|
|
142
|
-
# ===
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
# 1. Collect all image URLs from structured response
|
|
148
|
-
all_image_urls = []
|
|
149
|
-
for ref in structured.get("references", []):
|
|
150
|
-
if ref.get("images"):
|
|
151
|
-
all_image_urls.extend([img for img in ref["images"] if img and img.startswith("http")])
|
|
152
|
-
|
|
153
|
-
if all_image_urls:
|
|
154
|
-
# 2. Prefetch (wait for them as we are about to render)
|
|
155
|
-
cached_map = await cache.get_all_cached(all_image_urls)
|
|
156
|
-
|
|
157
|
-
# 3. Update structured response with cached (base64) URLs
|
|
223
|
+
# === Apply Cached Images ===
|
|
224
|
+
# Update structured response using the map from parallel prefetch
|
|
225
|
+
if cached_map:
|
|
226
|
+
try:
|
|
227
|
+
total_replaced = 0
|
|
158
228
|
for ref in structured.get("references", []):
|
|
159
229
|
if ref.get("images"):
|
|
160
|
-
# Keep cached images, but preserve original URLs as fallback
|
|
161
230
|
new_images = []
|
|
162
231
|
for img in ref["images"]:
|
|
163
|
-
# 1. Already Base64
|
|
232
|
+
# 1. Already Base64 -> Keep it
|
|
164
233
|
if img.startswith("data:"):
|
|
165
234
|
new_images.append(img)
|
|
166
235
|
continue
|
|
167
|
-
|
|
168
|
-
# 2.
|
|
236
|
+
|
|
237
|
+
# 2. Check cache
|
|
169
238
|
cached_val = cached_map.get(img)
|
|
170
239
|
if cached_val and cached_val.startswith("data:"):
|
|
171
240
|
new_images.append(cached_val)
|
|
172
|
-
|
|
241
|
+
total_replaced += 1
|
|
242
|
+
# 3. Else -> DROP IT (as per policy)
|
|
173
243
|
ref["images"] = new_images
|
|
174
|
-
|
|
175
|
-
|
|
244
|
+
logger.debug(f"Pipeline: Replaced {total_replaced} images with cached versions")
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.warning(f"Pipeline: Applying cached images failed: {e}")
|
|
176
247
|
|
|
177
248
|
# Debug: Log image counts
|
|
178
249
|
total_ref_images = sum(len(ref.get("images", []) or []) for ref in structured.get("references", []))
|
|
@@ -197,6 +268,8 @@ class ModularPipeline:
|
|
|
197
268
|
},
|
|
198
269
|
"stages_used": stages_used,
|
|
199
270
|
"web_results": context.web_results,
|
|
271
|
+
"vision_trace": trace.get("vision"),
|
|
272
|
+
"instruct_traces": trace.get("instruct_rounds", []),
|
|
200
273
|
}
|
|
201
274
|
|
|
202
275
|
except Exception as e:
|
|
@@ -314,6 +387,27 @@ class ModularPipeline:
|
|
|
314
387
|
"references": search_refs,
|
|
315
388
|
"description": f"Found {len(search_refs)} results."
|
|
316
389
|
})
|
|
390
|
+
|
|
391
|
+
# 2. Vision Stage (if used)
|
|
392
|
+
if trace.get("vision"):
|
|
393
|
+
v = trace["vision"]
|
|
394
|
+
if not v.get("skipped"):
|
|
395
|
+
usage = v.get("usage", {})
|
|
396
|
+
vision_cfg = self.config.get_model_config("vision")
|
|
397
|
+
input_price = vision_cfg.get("input_price") or 0
|
|
398
|
+
output_price = vision_cfg.get("output_price") or 0
|
|
399
|
+
cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
|
|
400
|
+
|
|
401
|
+
stages.append({
|
|
402
|
+
"name": "Vision",
|
|
403
|
+
"model": v.get("model"),
|
|
404
|
+
"icon_config": "google",
|
|
405
|
+
"provider": "Vision",
|
|
406
|
+
"time": v.get("time", 0),
|
|
407
|
+
"description": f"Analyzed {v.get('images_count', 0)} image(s).",
|
|
408
|
+
"usage": usage,
|
|
409
|
+
"cost": cost
|
|
410
|
+
})
|
|
317
411
|
|
|
318
412
|
# 2. Instruct Rounds
|
|
319
413
|
for i, t in enumerate(trace.get("instruct_rounds", [])):
|
entari_plugin_hyw/search.py
CHANGED
|
@@ -10,6 +10,7 @@ from .browser.service import get_screenshot_service
|
|
|
10
10
|
from .browser.engines.bing import BingEngine
|
|
11
11
|
from .browser.engines.duckduckgo import DuckDuckGoEngine
|
|
12
12
|
from .browser.engines.google import GoogleEngine
|
|
13
|
+
from .browser.engines.default import DefaultEngine
|
|
13
14
|
|
|
14
15
|
class SearchService:
|
|
15
16
|
def __init__(self, config: Any):
|
|
@@ -21,8 +22,11 @@ class SearchService:
|
|
|
21
22
|
# Domain blocking
|
|
22
23
|
self._blocked_domains = getattr(config, "blocked_domains", []) or []
|
|
23
24
|
|
|
24
|
-
# Select Engine
|
|
25
|
-
self._engine_name = getattr(config, "search_engine",
|
|
25
|
+
# Select Engine - DefaultEngine when not specified
|
|
26
|
+
self._engine_name = getattr(config, "search_engine", None)
|
|
27
|
+
if self._engine_name:
|
|
28
|
+
self._engine_name = self._engine_name.lower()
|
|
29
|
+
|
|
26
30
|
if self._engine_name == "bing":
|
|
27
31
|
self._engine = BingEngine()
|
|
28
32
|
elif self._engine_name == "google":
|
|
@@ -30,8 +34,9 @@ class SearchService:
|
|
|
30
34
|
elif self._engine_name == "duckduckgo":
|
|
31
35
|
self._engine = DuckDuckGoEngine()
|
|
32
36
|
else:
|
|
33
|
-
# Default
|
|
34
|
-
self._engine =
|
|
37
|
+
# Default: use browser address bar search (Google-based)
|
|
38
|
+
self._engine = DefaultEngine()
|
|
39
|
+
self._engine_name = "default"
|
|
35
40
|
|
|
36
41
|
logger.info(f"SearchService initialized with engine: {self._engine_name}")
|
|
37
42
|
|
|
@@ -39,7 +44,8 @@ class SearchService:
|
|
|
39
44
|
return self._engine.build_url(query, self._default_limit)
|
|
40
45
|
|
|
41
46
|
async def search_batch(self, queries: List[str]) -> List[List[Dict[str, Any]]]:
|
|
42
|
-
"""Execute multiple searches concurrently."""
|
|
47
|
+
"""Execute multiple searches concurrently using standard URL navigation."""
|
|
48
|
+
logger.info(f"SearchService: Batch searching {len(queries)} queries in parallel...")
|
|
43
49
|
tasks = [self.search(q) for q in queries]
|
|
44
50
|
return await asyncio.gather(*tasks)
|
|
45
51
|
|
|
@@ -58,17 +64,36 @@ class SearchService:
|
|
|
58
64
|
final_query = f"{query} {exclusions}"
|
|
59
65
|
|
|
60
66
|
url = self._build_search_url(final_query)
|
|
61
|
-
|
|
62
|
-
|
|
67
|
+
|
|
63
68
|
results = []
|
|
64
69
|
try:
|
|
65
|
-
#
|
|
66
|
-
|
|
70
|
+
# Check if this is an address bar search (DefaultEngine)
|
|
71
|
+
if url.startswith("__ADDRESS_BAR_SEARCH__:"):
|
|
72
|
+
# Extract query from marker
|
|
73
|
+
search_query = url.replace("__ADDRESS_BAR_SEARCH__:", "")
|
|
74
|
+
logger.info(f"Search: '{query}' -> [Address Bar Search]")
|
|
75
|
+
|
|
76
|
+
# Use address bar input method
|
|
77
|
+
service = get_screenshot_service(headless=self._headless)
|
|
78
|
+
page_data = await service.search_via_address_bar(search_query)
|
|
79
|
+
else:
|
|
80
|
+
logger.info(f"Search: '{query}' -> {url}")
|
|
81
|
+
# Standard URL navigation
|
|
82
|
+
page_data = await self.fetch_page_raw(url, include_screenshot=False)
|
|
83
|
+
|
|
67
84
|
content = page_data.get("html", "") or page_data.get("content", "")
|
|
85
|
+
|
|
86
|
+
# Debug: Log content length
|
|
87
|
+
logger.debug(f"Search: Raw content length = {len(content)} chars")
|
|
88
|
+
if len(content) < 500:
|
|
89
|
+
logger.warning(f"Search: Content too short, may be empty/blocked. First 500 chars: {content[:500]}")
|
|
68
90
|
|
|
69
91
|
# Parse Results (skip raw page - only return parsed results)
|
|
70
92
|
if content and not content.startswith("Error"):
|
|
71
93
|
parsed = self._engine.parse(content)
|
|
94
|
+
|
|
95
|
+
# Debug: Log parse result
|
|
96
|
+
logger.info(f"Search: Engine {self._engine_name} parsed {len(parsed)} results from {len(content)} chars")
|
|
72
97
|
|
|
73
98
|
# JAVASCRIPT IMAGE INJECTION
|
|
74
99
|
# Inject base64 images from JS extraction if available
|
|
@@ -84,6 +109,17 @@ class SearchService:
|
|
|
84
109
|
parsed[i]["images"].insert(0, b64_src)
|
|
85
110
|
|
|
86
111
|
logger.info(f"Search parsed {len(parsed)} results for '{query}' using {self._engine_name}")
|
|
112
|
+
|
|
113
|
+
# ALWAYS add raw search page as hidden item for debug saving
|
|
114
|
+
# (even when 0 results, so we can debug the parser)
|
|
115
|
+
results.append({
|
|
116
|
+
"title": f"[DEBUG] Raw Search: {query}",
|
|
117
|
+
"url": url,
|
|
118
|
+
"content": content[:50000], # Limit to 50KB
|
|
119
|
+
"_type": "search_raw_page",
|
|
120
|
+
"_hidden": True, # Don't show to LLM
|
|
121
|
+
})
|
|
122
|
+
|
|
87
123
|
results.extend(parsed)
|
|
88
124
|
else:
|
|
89
125
|
logger.warning(f"Search failed/empty for '{query}': {content[:100]}")
|
entari_plugin_hyw/stage_base.py
CHANGED
|
@@ -8,7 +8,7 @@ Analyze user query and execute initial searches.
|
|
|
8
8
|
import json
|
|
9
9
|
import time
|
|
10
10
|
import asyncio
|
|
11
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple, Callable, Awaitable
|
|
12
12
|
from loguru import logger
|
|
13
13
|
from openai import AsyncOpenAI
|
|
14
14
|
|
|
@@ -26,14 +26,15 @@ class InstructStage(BaseStage):
|
|
|
26
26
|
def name(self) -> str:
|
|
27
27
|
return "Instruct"
|
|
28
28
|
|
|
29
|
-
def __init__(self, config: Any, search_service: Any, client: AsyncOpenAI):
|
|
29
|
+
def __init__(self, config: Any, search_service: Any, client: AsyncOpenAI, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
|
|
30
30
|
super().__init__(config, search_service, client)
|
|
31
|
+
self.send_func = send_func
|
|
31
32
|
|
|
32
33
|
self.refuse_answer_tool = get_refuse_answer_tool()
|
|
33
34
|
self.web_search_tool = get_web_search_tool()
|
|
34
35
|
self.crawl_page_tool = get_crawl_page_tool()
|
|
35
36
|
self.set_mode_tool = get_set_mode_tool()
|
|
36
|
-
|
|
37
|
+
|
|
37
38
|
async def execute(self, context: StageContext) -> StageResult:
|
|
38
39
|
start_time = time.time()
|
|
39
40
|
|
|
@@ -113,6 +114,7 @@ class InstructStage(BaseStage):
|
|
|
113
114
|
model = model_cfg.get("model_name") or self.config.model_name
|
|
114
115
|
|
|
115
116
|
try:
|
|
117
|
+
logger.info(f"Instruct: Sending LLM request to {model}...")
|
|
116
118
|
response = await client.chat.completions.create(
|
|
117
119
|
model=model,
|
|
118
120
|
messages=messages,
|
|
@@ -186,6 +188,14 @@ class InstructStage(BaseStage):
|
|
|
186
188
|
if mode in ("fast", "deepsearch"):
|
|
187
189
|
context.selected_mode = mode
|
|
188
190
|
logger.info(f"Instruct: Mode set to '{mode}'")
|
|
191
|
+
|
|
192
|
+
# Notify immediately if deepsearch
|
|
193
|
+
if mode == "deepsearch" and self.send_func:
|
|
194
|
+
try:
|
|
195
|
+
await self.send_func("🔍 正在进行深度研究,可能需要一些时间,请耐心等待...")
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.warning(f"Instruct: Failed to send notification: {e}")
|
|
198
|
+
|
|
189
199
|
results_for_context.append({
|
|
190
200
|
"id": tc_id, "name": name, "content": f"Mode set to: {mode}"
|
|
191
201
|
})
|
|
@@ -47,6 +47,11 @@ class SummaryStage(BaseStage):
|
|
|
47
47
|
# Build Context Message
|
|
48
48
|
context_message = f"## Web Search & Page Content\n\n```context\n{full_context}\n```"
|
|
49
49
|
|
|
50
|
+
# Add vision description if present (from VisionStage)
|
|
51
|
+
if context.vision_description:
|
|
52
|
+
vision_context = f"## 用户图片描述\n\n{context.vision_description}"
|
|
53
|
+
context_message = f"{vision_context}\n\n{context_message}"
|
|
54
|
+
|
|
50
55
|
# Build user content
|
|
51
56
|
user_text = context.user_input or "..."
|
|
52
57
|
if images:
|
|
@@ -104,6 +109,7 @@ class SummaryStage(BaseStage):
|
|
|
104
109
|
"provider": model_cfg.get("model_provider") or "Unknown",
|
|
105
110
|
"usage": usage,
|
|
106
111
|
"system_prompt": system_prompt,
|
|
112
|
+
"context_message": context_message, # Includes vision description + search results
|
|
107
113
|
"output": content,
|
|
108
114
|
"time": time.time() - start_time,
|
|
109
115
|
"images_count": len(images) if images else 0,
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vision Stage
|
|
3
|
+
|
|
4
|
+
Generates image description using a vision-capable model.
|
|
5
|
+
The description is then passed as context to subsequent stages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from loguru import logger
|
|
12
|
+
from openai import AsyncOpenAI
|
|
13
|
+
|
|
14
|
+
from .stage_base import BaseStage, StageContext, StageResult
|
|
15
|
+
from .definitions import VISION_DESCRIPTION_SP
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class VisionStage(BaseStage):
|
|
19
|
+
"""
|
|
20
|
+
Vision Stage: Generate image description.
|
|
21
|
+
|
|
22
|
+
Takes user images and text, calls a vision model to produce
|
|
23
|
+
a detailed description of the image content.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def name(self) -> str:
|
|
28
|
+
return "Vision"
|
|
29
|
+
|
|
30
|
+
async def execute(
|
|
31
|
+
self,
|
|
32
|
+
context: StageContext,
|
|
33
|
+
images: List[str] = None
|
|
34
|
+
) -> StageResult:
|
|
35
|
+
"""Generate image description."""
|
|
36
|
+
start_time = time.time()
|
|
37
|
+
|
|
38
|
+
if not images:
|
|
39
|
+
return StageResult(
|
|
40
|
+
success=True,
|
|
41
|
+
data={"description": ""},
|
|
42
|
+
trace={"skipped": True, "reason": "No images provided"}
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Get model config for vision stage
|
|
46
|
+
model_cfg = self.config.get_model_config("vision")
|
|
47
|
+
model = model_cfg.get("model_name")
|
|
48
|
+
|
|
49
|
+
if not model:
|
|
50
|
+
logger.warning("VisionStage: No vision model configured, skipping")
|
|
51
|
+
return StageResult(
|
|
52
|
+
success=True,
|
|
53
|
+
data={"description": ""},
|
|
54
|
+
trace={"skipped": True, "reason": "No vision model configured"}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
client = self._client_for(
|
|
58
|
+
api_key=model_cfg.get("api_key"),
|
|
59
|
+
base_url=model_cfg.get("base_url")
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Build user content with images
|
|
63
|
+
user_text = context.user_input or "请描述这张图片"
|
|
64
|
+
user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
|
|
65
|
+
|
|
66
|
+
for img_b64 in images:
|
|
67
|
+
url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
68
|
+
user_content.append({"type": "image_url", "image_url": {"url": url}})
|
|
69
|
+
|
|
70
|
+
messages = [
|
|
71
|
+
{"role": "system", "content": VISION_DESCRIPTION_SP},
|
|
72
|
+
{"role": "user", "content": user_content}
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
logger.info(f"VisionStage: Calling model '{model}' with {len(images)} image(s)")
|
|
77
|
+
response = await client.chat.completions.create(
|
|
78
|
+
model=model,
|
|
79
|
+
messages=messages,
|
|
80
|
+
temperature=0.3, # Lower temperature for factual description
|
|
81
|
+
extra_body=model_cfg.get("extra_body"),
|
|
82
|
+
)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(f"VisionStage LLM error: {e}")
|
|
85
|
+
return StageResult(
|
|
86
|
+
success=False,
|
|
87
|
+
error=str(e),
|
|
88
|
+
data={"description": ""},
|
|
89
|
+
trace={"error": str(e)}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
usage = {"input_tokens": 0, "output_tokens": 0}
|
|
93
|
+
if hasattr(response, "usage") and response.usage:
|
|
94
|
+
usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
95
|
+
usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
96
|
+
|
|
97
|
+
description = (response.choices[0].message.content or "").strip()
|
|
98
|
+
|
|
99
|
+
logger.info(f"VisionStage: Generated description ({len(description)} chars)")
|
|
100
|
+
|
|
101
|
+
return StageResult(
|
|
102
|
+
success=True,
|
|
103
|
+
data={"description": description},
|
|
104
|
+
usage=usage,
|
|
105
|
+
trace={
|
|
106
|
+
"model": model,
|
|
107
|
+
"provider": model_cfg.get("model_provider") or "Unknown",
|
|
108
|
+
"usage": usage,
|
|
109
|
+
"output": description,
|
|
110
|
+
"time": time.time() - start_time,
|
|
111
|
+
"images_count": len(images),
|
|
112
|
+
}
|
|
113
|
+
)
|