entari-plugin-hyw 4.0.0rc5__tar.gz → 4.0.0rc6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- {entari_plugin_hyw-4.0.0rc5/src/entari_plugin_hyw.egg-info → entari_plugin_hyw-4.0.0rc6}/PKG-INFO +1 -1
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/pyproject.toml +1 -1
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/__init__.py +47 -7
- entari_plugin_hyw-4.0.0rc5/src/entari_plugin_hyw/browser/engines/searxng.py → entari_plugin_hyw-4.0.0rc6/src/entari_plugin_hyw/browser/engines/duckduckgo.py +4 -4
- entari_plugin_hyw-4.0.0rc6/src/entari_plugin_hyw/browser/engines/google.py +155 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/service.py +55 -26
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/definitions.py +34 -9
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/misc.py +34 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/modular_pipeline.py +51 -18
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/search.py +26 -20
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/stage_base.py +4 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/stage_instruct.py +21 -4
- entari_plugin_hyw-4.0.0rc6/src/entari_plugin_hyw/stage_instruct_deepsearch.py +104 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6/src/entari_plugin_hyw.egg-info}/PKG-INFO +1 -1
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw.egg-info/SOURCES.txt +3 -2
- entari_plugin_hyw-4.0.0rc5/src/entari_plugin_hyw/stage_instruct_review.py +0 -92
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/MANIFEST.in +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/README.md +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/setup.cfg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/index.html +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/gemini.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/google.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/openai.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/card-dist/vite.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/anthropic.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/cerebras.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/gemini.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/google.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/grok.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/huggingface.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/microsoft.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/minimax.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/mistral.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/nvida.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/openai.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/perplexity.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/qwen.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/xai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/assets/icon/zai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/engines/base.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/engines/bing.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/landing.html +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/manager.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/.gitignore +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/README.md +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/index.html +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/package-lock.json +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/package.json +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/anthropic.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/cerebras.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/gemini.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/google.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/microsoft.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/openai.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/perplexity.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/public/vite.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/App.vue +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/assets/vue.svg +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/components/SectionCard.vue +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/components/StageCard.vue +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/main.ts +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/style.css +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/test_regex.js +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/src/types.ts +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/tsconfig.app.json +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/tsconfig.json +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/tsconfig.node.json +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/card-ui/vite.config.ts +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/history.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/image_cache.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/render_vue.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/stage_summary.py +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw.egg-info/dependency_links.txt +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw.egg-info/requires.txt +0 -0
- {entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw.egg-info/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "entari_plugin_hyw"
|
|
7
|
-
version = "4.0.0-
|
|
7
|
+
version = "4.0.0-rc6"
|
|
8
8
|
description = "Use large language models to interpret chat messages"
|
|
9
9
|
authors = [{name = "kumoSleeping", email = "zjr2992@outlook.com"}]
|
|
10
10
|
dependencies = [
|
|
@@ -22,7 +22,7 @@ from arclet.entari.event.command import CommandReceive
|
|
|
22
22
|
from .modular_pipeline import ModularPipeline
|
|
23
23
|
from .history import HistoryManager
|
|
24
24
|
from .render_vue import ContentRenderer, get_content_renderer
|
|
25
|
-
from .misc import process_onebot_json, process_images, resolve_model_name, render_refuse_answer, REFUSE_ANSWER_MARKDOWN
|
|
25
|
+
from .misc import process_onebot_json, process_images, resolve_model_name, render_refuse_answer, render_image_unsupported, REFUSE_ANSWER_MARKDOWN
|
|
26
26
|
from arclet.entari.event.lifespan import Cleanup
|
|
27
27
|
|
|
28
28
|
import os
|
|
@@ -93,6 +93,7 @@ class ModelConfig:
|
|
|
93
93
|
model_provider: Optional[str] = None
|
|
94
94
|
input_price: Optional[float] = None
|
|
95
95
|
output_price: Optional[float] = None
|
|
96
|
+
image_input: bool = True
|
|
96
97
|
|
|
97
98
|
|
|
98
99
|
@dataclass
|
|
@@ -119,9 +120,7 @@ class HywConfig(BasicConfModel):
|
|
|
119
120
|
main: Optional[ModelConfig] = None # Summary stage
|
|
120
121
|
|
|
121
122
|
# Search/Fetch Settings
|
|
122
|
-
search_engine: str = "
|
|
123
|
-
enable_domain_blocking: bool = True
|
|
124
|
-
page_content_mode: str = "text"
|
|
123
|
+
search_engine: str = "google"
|
|
125
124
|
|
|
126
125
|
# Rendering Settings
|
|
127
126
|
headless: bool = False
|
|
@@ -131,10 +130,10 @@ class HywConfig(BasicConfModel):
|
|
|
131
130
|
# Bot Behavior
|
|
132
131
|
save_conversation: bool = False
|
|
133
132
|
reaction: bool = False
|
|
134
|
-
quote: bool =
|
|
133
|
+
quote: bool = False
|
|
135
134
|
|
|
136
135
|
# UI Theme
|
|
137
|
-
theme_color: str = "#
|
|
136
|
+
theme_color: str = "#ff0000"
|
|
138
137
|
|
|
139
138
|
def __post_init__(self):
|
|
140
139
|
"""Parse and normalize theme color after initialization."""
|
|
@@ -315,7 +314,48 @@ async def process_request(
|
|
|
315
314
|
|
|
316
315
|
images, err = await process_images(mc, vision_model)
|
|
317
316
|
|
|
318
|
-
#
|
|
317
|
+
# Check image input support
|
|
318
|
+
model_cfg_dict = next((m for m in conf.models if m.get("name") == model), None)
|
|
319
|
+
image_input_supported = True
|
|
320
|
+
if model_cfg_dict:
|
|
321
|
+
image_input_supported = model_cfg_dict.get("image_input", True)
|
|
322
|
+
|
|
323
|
+
# Log inferenced content mode
|
|
324
|
+
inferred_content_mode = "image" if image_input_supported else "text"
|
|
325
|
+
logger.info(f"Process Request: Model '{model}' Image Input: {image_input_supported} -> Mode: {inferred_content_mode}")
|
|
326
|
+
|
|
327
|
+
if images and not image_input_supported:
|
|
328
|
+
logger.warning(f"Model '{model}' does not support images, but user sent {len(images)} images.")
|
|
329
|
+
|
|
330
|
+
# Start renderer for the unsupported card
|
|
331
|
+
renderer = await get_content_renderer()
|
|
332
|
+
render_tab_task = asyncio.create_task(renderer.prepare_tab())
|
|
333
|
+
|
|
334
|
+
# Wait for tab and render unsupported
|
|
335
|
+
try:
|
|
336
|
+
tab_id = await render_tab_task
|
|
337
|
+
except Exception as e:
|
|
338
|
+
tab_id = None
|
|
339
|
+
|
|
340
|
+
import tempfile
|
|
341
|
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tf:
|
|
342
|
+
output_path = tf.name
|
|
343
|
+
|
|
344
|
+
render_ok = await render_image_unsupported(
|
|
345
|
+
renderer=renderer,
|
|
346
|
+
output_path=output_path,
|
|
347
|
+
theme_color=conf.theme_color,
|
|
348
|
+
tab_id=tab_id
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if render_ok:
|
|
352
|
+
with open(output_path, "rb") as f:
|
|
353
|
+
img_data = base64.b64encode(f.read()).decode()
|
|
354
|
+
await session.send(MessageChain(Image(src=f'data:image/png;base64,{img_data}')))
|
|
355
|
+
if os.path.exists(output_path):
|
|
356
|
+
os.remove(output_path)
|
|
357
|
+
return
|
|
358
|
+
|
|
319
359
|
renderer = await get_content_renderer()
|
|
320
360
|
render_tab_task = asyncio.create_task(renderer.prepare_tab())
|
|
321
361
|
tab_id = None
|
|
@@ -5,9 +5,9 @@ from typing import List, Dict, Any
|
|
|
5
5
|
from loguru import logger
|
|
6
6
|
from .base import SearchEngine
|
|
7
7
|
|
|
8
|
-
class
|
|
8
|
+
class DuckDuckGoEngine(SearchEngine):
|
|
9
9
|
"""
|
|
10
|
-
Parser for DuckDuckGo
|
|
10
|
+
Parser for DuckDuckGo Lite results.
|
|
11
11
|
Handles both Markdown (from Crawl4AI) and HTML (fallback).
|
|
12
12
|
"""
|
|
13
13
|
|
|
@@ -83,7 +83,7 @@ class SearXNGEngine(SearchEngine):
|
|
|
83
83
|
})
|
|
84
84
|
seen_urls.add(href)
|
|
85
85
|
|
|
86
|
-
logger.info(f"
|
|
86
|
+
logger.info(f"DuckDuckGo Parser(HTML) found {len(results)} results.")
|
|
87
87
|
return results
|
|
88
88
|
|
|
89
89
|
def _parse_markdown(self, content: str) -> List[Dict[str, Any]]:
|
|
@@ -133,5 +133,5 @@ class SearXNGEngine(SearchEngine):
|
|
|
133
133
|
if current_result:
|
|
134
134
|
results.append(current_result)
|
|
135
135
|
|
|
136
|
-
logger.info(f"
|
|
136
|
+
logger.info(f"DuckDuckGo Parser(Markdown) found {len(results)} results.")
|
|
137
137
|
return results
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
|
|
2
|
+
import urllib.parse
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Dict, Any
|
|
5
|
+
from loguru import logger
|
|
6
|
+
from .base import SearchEngine
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GoogleEngine(SearchEngine):
|
|
10
|
+
"""
|
|
11
|
+
Search engine implementation for Google.
|
|
12
|
+
Parses Google Search HTML results.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def build_url(self, query: str, limit: int = 10) -> str:
|
|
16
|
+
encoded_query = urllib.parse.quote(query)
|
|
17
|
+
return f"https://www.google.com/search?q={encoded_query}"
|
|
18
|
+
|
|
19
|
+
def parse(self, content: str) -> List[Dict[str, Any]]:
|
|
20
|
+
results = []
|
|
21
|
+
seen_urls = set()
|
|
22
|
+
|
|
23
|
+
# Google search results are in blocks with class="MjjYud" or similar containers
|
|
24
|
+
# Split by result blocks first for more accurate extraction
|
|
25
|
+
|
|
26
|
+
# Method 1: Split by common result block classes
|
|
27
|
+
block_patterns = [
|
|
28
|
+
r'<div class="MjjYud"[^>]*>',
|
|
29
|
+
r'<div class="tF2Cxc"[^>]*>',
|
|
30
|
+
r'<div class="g Ww4FFb"[^>]*>',
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
blocks = [content]
|
|
34
|
+
for bp in block_patterns:
|
|
35
|
+
new_blocks = []
|
|
36
|
+
for block in blocks:
|
|
37
|
+
parts = re.split(bp, block)
|
|
38
|
+
new_blocks.extend(parts)
|
|
39
|
+
blocks = new_blocks
|
|
40
|
+
|
|
41
|
+
for block in blocks:
|
|
42
|
+
if len(block) < 100:
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
# Find URL in this block - prefer links with h3 nearby
|
|
46
|
+
url_match = re.search(r'<a[^>]+href="(https?://(?!www\.google\.|google\.|webcache\.googleusercontent\.)[^"]+)"[^>]*>', block)
|
|
47
|
+
if not url_match:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
url = url_match.group(1)
|
|
51
|
+
if url in seen_urls or self._should_skip_url(url):
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
# Find h3 title in this block
|
|
55
|
+
h3_match = re.search(r'<h3[^>]*>(.*?)</h3>', block, re.IGNORECASE | re.DOTALL)
|
|
56
|
+
if not h3_match:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
title = re.sub(r'<[^>]+>', '', h3_match.group(1)).strip()
|
|
60
|
+
if not title or len(title) < 2:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
seen_urls.add(url)
|
|
64
|
+
|
|
65
|
+
# Extract snippet from VwiC3b class (Google's snippet container)
|
|
66
|
+
snippet = ""
|
|
67
|
+
snippet_match = re.search(r'<div[^>]*class="[^"]*VwiC3b[^"]*"[^>]*>(.*?)</div>', block, re.IGNORECASE | re.DOTALL)
|
|
68
|
+
if snippet_match:
|
|
69
|
+
snippet = re.sub(r'<[^>]+>', ' ', snippet_match.group(1)).strip()
|
|
70
|
+
snippet = re.sub(r'\s+', ' ', snippet).strip()
|
|
71
|
+
|
|
72
|
+
# Fallback: look for any text after h3
|
|
73
|
+
if not snippet:
|
|
74
|
+
# Try other common snippet patterns
|
|
75
|
+
alt_patterns = [
|
|
76
|
+
r'<span[^>]*class="[^"]*aCOpRe[^"]*"[^>]*>(.*?)</span>',
|
|
77
|
+
r'<div[^>]*data-snc[^>]*>(.*?)</div>',
|
|
78
|
+
]
|
|
79
|
+
for ap in alt_patterns:
|
|
80
|
+
am = re.search(ap, block, re.IGNORECASE | re.DOTALL)
|
|
81
|
+
if am:
|
|
82
|
+
snippet = re.sub(r'<[^>]+>', ' ', am.group(1)).strip()
|
|
83
|
+
snippet = re.sub(r'\s+', ' ', snippet).strip()
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
# Extract images from this block
|
|
87
|
+
images = []
|
|
88
|
+
# Pattern 1: Regular img src (excluding data: and tracking pixels)
|
|
89
|
+
# Note: gstatic.com/images/branding is logo, but encrypted-tbn*.gstatic.com are thumbnails
|
|
90
|
+
img_matches = re.findall(r'<img[^>]+src="(https?://[^"]+)"', block)
|
|
91
|
+
for img_url in img_matches:
|
|
92
|
+
# Decode HTML entities
|
|
93
|
+
img_url = img_url.replace('&', '&')
|
|
94
|
+
# Skip tracking/icon/small images (but allow encrypted-tbn which are valid thumbnails)
|
|
95
|
+
if any(x in img_url.lower() for x in ['favicon', 'icon', 'tracking', 'pixel', 'logo', 'gstatic.com/images/branding', '1x1', 'transparent', 'gstatic.com/images/icons']):
|
|
96
|
+
continue
|
|
97
|
+
if img_url not in images:
|
|
98
|
+
images.append(img_url)
|
|
99
|
+
|
|
100
|
+
# Pattern 2: data-src (lazy loaded images)
|
|
101
|
+
data_src_matches = re.findall(r'data-src="(https?://[^"]+)"', block)
|
|
102
|
+
for img_url in data_src_matches:
|
|
103
|
+
img_url = img_url.replace('&', '&')
|
|
104
|
+
if any(x in img_url.lower() for x in ['favicon', 'icon', 'tracking', 'pixel', 'logo']):
|
|
105
|
+
continue
|
|
106
|
+
if img_url not in images:
|
|
107
|
+
images.append(img_url)
|
|
108
|
+
|
|
109
|
+
results.append({
|
|
110
|
+
"title": title,
|
|
111
|
+
"url": url,
|
|
112
|
+
"domain": urllib.parse.urlparse(url).hostname or "",
|
|
113
|
+
"content": snippet[:1000],
|
|
114
|
+
"images": images[:3] # Limit to 3 images per result
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
if len(results) >= 15:
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
total_images = sum(len(r.get("images", [])) for r in results)
|
|
121
|
+
logger.info(f"GoogleEngine parsed {len(results)} results with {total_images} images total.")
|
|
122
|
+
return results
|
|
123
|
+
|
|
124
|
+
def _should_skip_url(self, url: str) -> bool:
|
|
125
|
+
"""Check if URL should be skipped."""
|
|
126
|
+
skip_patterns = [
|
|
127
|
+
"google.com",
|
|
128
|
+
"googleusercontent.com",
|
|
129
|
+
"gstatic.com",
|
|
130
|
+
"youtube.com/watch", # Keep channel/playlist but skip individual videos
|
|
131
|
+
"maps.google",
|
|
132
|
+
"translate.google",
|
|
133
|
+
"accounts.google",
|
|
134
|
+
"support.google",
|
|
135
|
+
"policies.google",
|
|
136
|
+
"schema.org",
|
|
137
|
+
"javascript:",
|
|
138
|
+
"data:",
|
|
139
|
+
"#",
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
for pattern in skip_patterns:
|
|
143
|
+
if pattern in url.lower():
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
# Skip very short URLs (likely invalid)
|
|
147
|
+
if len(url) < 20:
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
# Skip URLs that are just root domains without path
|
|
151
|
+
parsed = urllib.parse.urlparse(url)
|
|
152
|
+
if not parsed.path or parsed.path == "/":
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
return False
|
{entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/browser/service.py
RENAMED
|
@@ -124,42 +124,71 @@ class ScreenshotService:
|
|
|
124
124
|
) or ""
|
|
125
125
|
|
|
126
126
|
# 2. Extract Images via Parallelized JS (Gallery)
|
|
127
|
+
# Strategy: For search pages, use Canvas to grab already loaded images (Instant)
|
|
128
|
+
# For other pages, use fetch (more robust for lazy load)
|
|
127
129
|
images_b64 = []
|
|
128
130
|
try:
|
|
129
|
-
|
|
131
|
+
js_code = """
|
|
130
132
|
(async () => {
|
|
131
133
|
const blocklist = ['logo', 'icon', 'avatar', 'ad', 'pixel', 'tracker', 'button', 'menu', 'nav'];
|
|
132
134
|
const candidates = Array.from(document.querySelectorAll('img'));
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
const cls = (typeof img.className === 'string' ? img.className : '').toLowerCase();
|
|
138
|
-
const src = img.src.toLowerCase();
|
|
139
|
-
if (blocklist.some(b => alt.includes(b) || cls.includes(b) || src.includes(b))) return false;
|
|
140
|
-
return true;
|
|
141
|
-
}).slice(0, 10);
|
|
142
|
-
|
|
143
|
-
const fetchImage = async (url) => {
|
|
135
|
+
const validImages = [];
|
|
136
|
+
|
|
137
|
+
// Helper: Get base64 from loaded image via Canvas
|
|
138
|
+
const getBase64 = (img) => {
|
|
144
139
|
try {
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
return
|
|
151
|
-
const reader = new FileReader();
|
|
152
|
-
reader.onloadend = () => resolve(reader.result.split(',')[1]);
|
|
153
|
-
reader.onerror = () => resolve(null);
|
|
154
|
-
reader.readAsDataURL(blob);
|
|
155
|
-
});
|
|
140
|
+
const canvas = document.createElement('canvas');
|
|
141
|
+
canvas.width = img.naturalWidth;
|
|
142
|
+
canvas.height = img.naturalHeight;
|
|
143
|
+
const ctx = canvas.getContext('2d');
|
|
144
|
+
ctx.drawImage(img, 0, 0);
|
|
145
|
+
return canvas.toDataURL('image/jpeg').split(',')[1];
|
|
156
146
|
} catch(e) { return null; }
|
|
157
147
|
};
|
|
158
148
|
|
|
159
|
-
const
|
|
160
|
-
|
|
149
|
+
for (const img of candidates) {
|
|
150
|
+
if (validImages.length >= 8) break;
|
|
151
|
+
|
|
152
|
+
if (img.naturalWidth < 100 || img.naturalHeight < 80) continue;
|
|
153
|
+
|
|
154
|
+
const alt = (img.alt || '').toLowerCase();
|
|
155
|
+
const cls = (typeof img.className === 'string' ? img.className : '').toLowerCase();
|
|
156
|
+
const src = (img.src || '').toLowerCase();
|
|
157
|
+
|
|
158
|
+
if (blocklist.some(b => alt.includes(b) || cls.includes(b) || src.includes(b))) continue;
|
|
159
|
+
|
|
160
|
+
// 1. Try Canvas (Instant for loaded images)
|
|
161
|
+
if (img.complete && img.naturalHeight > 0) {
|
|
162
|
+
const b64 = getBase64(img);
|
|
163
|
+
if (b64) {
|
|
164
|
+
validImages.push(b64);
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// 2. Fallback to fetch (only for non-search pages to avoid delay)
|
|
170
|
+
// We skip fetch for search pages to ensure speed
|
|
171
|
+
if (!window.location.href.includes('google') && !window.location.href.includes('search')) {
|
|
172
|
+
try {
|
|
173
|
+
const controller = new AbortController();
|
|
174
|
+
const id = setTimeout(() => controller.abort(), 2000);
|
|
175
|
+
const resp = await fetch(img.src, { signal: controller.signal });
|
|
176
|
+
clearTimeout(id);
|
|
177
|
+
const blob = await resp.blob();
|
|
178
|
+
const b64 = await new Promise(resolve => {
|
|
179
|
+
const reader = new FileReader();
|
|
180
|
+
reader.onloadend = () => resolve(reader.result.split(',')[1]);
|
|
181
|
+
reader.onerror = () => resolve(null);
|
|
182
|
+
reader.readAsDataURL(blob);
|
|
183
|
+
});
|
|
184
|
+
if (b64) validImages.push(b64);
|
|
185
|
+
} catch(e) {}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return validImages;
|
|
161
189
|
})()
|
|
162
|
-
"""
|
|
190
|
+
"""
|
|
191
|
+
images_b64 = tab.run_js(js_code, as_expr=True) or []
|
|
163
192
|
|
|
164
193
|
if images_b64:
|
|
165
194
|
logger.info(f"ScreenshotService: Extracted {len(images_b64)} images for {url}")
|
{entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/definitions.py
RENAMED
|
@@ -11,7 +11,7 @@ from typing import Dict, Any
|
|
|
11
11
|
# =============================================================================
|
|
12
12
|
|
|
13
13
|
INSTRUCT_SP = """你是一个智能 INSTRUCT 助手。
|
|
14
|
-
|
|
14
|
+
你的目标是为用户的查询制定调研计划并执行
|
|
15
15
|
> 尽量完成所有搜索和获取, 需要的时候可以并发执行多个同种或不同种工具
|
|
16
16
|
|
|
17
17
|
- 用户消息包含典型名词、可能的专有名词组合: 调用 `web_search` 工具(最大3)。
|
|
@@ -22,25 +22,28 @@ INSTRUCT_SP = """你是一个智能 INSTRUCT 助手。
|
|
|
22
22
|
- 明显的鉴证、国家领导人、历史敏感事件隐喻
|
|
23
23
|
- 过于露骨的r18+、r18g内容(但不包括 galgame等acg向, 除非其过于违规)
|
|
24
24
|
|
|
25
|
+
## 内容审查
|
|
26
|
+
以下情况调用 `refuse_answer`:
|
|
27
|
+
- 内容涉及隐喻政治事件任务
|
|
28
|
+
- r18+、r18g(但不包含正常galgame、科普等)
|
|
29
|
+
|
|
30
|
+
## 决定流程
|
|
31
|
+
调用 `set_mode` 决定使用 `fast` 还是 `deepsearch`:
|
|
32
|
+
- 只要用户不明确研究就用 `fast`
|
|
33
|
+
- 用户提到: 深度搜索、深度研究、deepsearch、整理关于...的资料, 这几种情况时, 选择 `deepsearch`
|
|
25
34
|
|
|
26
35
|
## 重要规则 (CRITICAL RULES):
|
|
27
36
|
- 禁止输出任何文本回复:你必须且只能通过工具调用来行动。
|
|
28
|
-
- 禁止回答用户的问题:你的任务仅仅是收集信息,回答将由后续阶段完成。
|
|
29
|
-
- 禁止解释你的行为:不要输出思考过程或计划描述,直接调用工具。
|
|
30
37
|
- 如果没有工具调用,流程将自动结束。
|
|
31
38
|
"""
|
|
32
39
|
|
|
33
|
-
|
|
34
|
-
|
|
40
|
+
INSTRUCT_DEEPSEARCH_SP = """你是一个智能 INSTRUCT_DEEPSEARCH 审查助手, 你需要对 INSTRUCT 的输出进行多次信息补充直到信息足够、或达到次数上限(3次)
|
|
35
41
|
|
|
36
|
-
- 你已经使用过 `web_search` 工具, 不推荐再次使用, 即便你微调搜索词也只能获取重复信息
|
|
37
42
|
- 推荐使用 `crawl_page` 工具查看官方网站、wiki网站(但不推荐维基百科)、权威网站
|
|
38
43
|
- crawl_page 永远不使用国内垃圾网站例如 csdn、知乎、等重复搬运二手信息的网站
|
|
39
44
|
|
|
40
45
|
## 重要规则 (CRITICAL RULES):
|
|
41
46
|
- 禁止输出任何文本回复:你必须且只能通过工具调用来行动。
|
|
42
|
-
- 禁止回答用户的问题:你的任务仅仅是收集信息。
|
|
43
|
-
- 禁止解释你的行为:直接调用所需工具。
|
|
44
47
|
- 如果没有必要进一步操作,请不要输出任何内容(空回复),流程将自动进入下一阶段。
|
|
45
48
|
"""
|
|
46
49
|
|
|
@@ -101,7 +104,7 @@ def get_web_search_tool() -> Dict[str, Any]:
|
|
|
101
104
|
return {
|
|
102
105
|
"type": "function",
|
|
103
106
|
"function": {
|
|
104
|
-
"name": "
|
|
107
|
+
"name": "web_search",
|
|
105
108
|
"description": "网络搜索",
|
|
106
109
|
"parameters": {
|
|
107
110
|
"type": "object",
|
|
@@ -128,3 +131,25 @@ def get_crawl_page_tool() -> Dict[str, Any]:
|
|
|
128
131
|
},
|
|
129
132
|
},
|
|
130
133
|
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_set_mode_tool() -> Dict[str, Any]:
|
|
137
|
+
"""Tool for setting the pipeline mode (fast or deepsearch)."""
|
|
138
|
+
return {
|
|
139
|
+
"type": "function",
|
|
140
|
+
"function": {
|
|
141
|
+
"name": "set_mode",
|
|
142
|
+
"description": "设置本次查询的处理模式",
|
|
143
|
+
"parameters": {
|
|
144
|
+
"type": "object",
|
|
145
|
+
"properties": {
|
|
146
|
+
"mode": {
|
|
147
|
+
"type": "string",
|
|
148
|
+
"enum": ["fast", "deepsearch"],
|
|
149
|
+
"description": "fast=快速回答 / deepsearch=深度研究"
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
"required": ["mode"],
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
}
|
|
@@ -133,3 +133,37 @@ async def render_refuse_answer(
|
|
|
133
133
|
theme_color=theme_color,
|
|
134
134
|
)
|
|
135
135
|
|
|
136
|
+
|
|
137
|
+
IMAGE_UNSUPPORTED_MARKDOWN = """
|
|
138
|
+
<summary>
|
|
139
|
+
当前模型不支持图片输入,请使用支持视觉能力的模型或仅发送文本。
|
|
140
|
+
</summary>
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
async def render_image_unsupported(
|
|
144
|
+
renderer,
|
|
145
|
+
output_path: str,
|
|
146
|
+
theme_color: str = "#ef4444",
|
|
147
|
+
tab_id: str = None
|
|
148
|
+
) -> bool:
|
|
149
|
+
"""
|
|
150
|
+
Render a card indicating that the model does not support image input.
|
|
151
|
+
"""
|
|
152
|
+
markdown = f"""
|
|
153
|
+
# 图片输入不支持
|
|
154
|
+
|
|
155
|
+
> 当前选择的模型不支持图片输入。
|
|
156
|
+
> 请切换到支持视觉的模型,或仅发送文本内容。
|
|
157
|
+
"""
|
|
158
|
+
return await renderer.render(
|
|
159
|
+
markdown_content=markdown,
|
|
160
|
+
output_path=output_path,
|
|
161
|
+
stats={},
|
|
162
|
+
references=[],
|
|
163
|
+
page_references=[],
|
|
164
|
+
image_references=[],
|
|
165
|
+
stages_used=[],
|
|
166
|
+
image_timeout=1000,
|
|
167
|
+
theme_color=theme_color,
|
|
168
|
+
tab_id=tab_id
|
|
169
|
+
)
|
{entari_plugin_hyw-4.0.0rc5 → entari_plugin_hyw-4.0.0rc6}/src/entari_plugin_hyw/modular_pipeline.py
RENAMED
|
@@ -7,14 +7,14 @@ Simpler flow with self-correction/feedback loop.
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import time
|
|
10
|
-
from typing import Any, Dict, List, Optional
|
|
10
|
+
from typing import Any, Dict, List, Optional, Callable, Awaitable
|
|
11
11
|
|
|
12
12
|
from loguru import logger
|
|
13
13
|
from openai import AsyncOpenAI
|
|
14
14
|
|
|
15
15
|
from .stage_base import StageContext
|
|
16
16
|
from .stage_instruct import InstructStage
|
|
17
|
-
from .
|
|
17
|
+
from .stage_instruct_deepsearch import InstructDeepsearchStage
|
|
18
18
|
from .stage_summary import SummaryStage
|
|
19
19
|
from .search import SearchService
|
|
20
20
|
|
|
@@ -24,19 +24,20 @@ class ModularPipeline:
|
|
|
24
24
|
Modular Pipeline.
|
|
25
25
|
|
|
26
26
|
Flow:
|
|
27
|
-
1. Instruct
|
|
28
|
-
2. Instruct
|
|
27
|
+
1. Instruct: Initial Discovery + Mode Decision (fast/deepsearch).
|
|
28
|
+
2. [Deepsearch only] Instruct Deepsearch Loop: Supplement info (max 3 iterations).
|
|
29
29
|
3. Summary: Generate final response.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
def __init__(self, config: Any):
|
|
32
|
+
def __init__(self, config: Any, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
|
|
33
33
|
self.config = config
|
|
34
|
+
self.send_func = send_func
|
|
34
35
|
self.search_service = SearchService(config)
|
|
35
36
|
self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
|
|
36
37
|
|
|
37
38
|
# Initialize stages
|
|
38
39
|
self.instruct_stage = InstructStage(config, self.search_service, self.client)
|
|
39
|
-
self.
|
|
40
|
+
self.instruct_deepsearch_stage = InstructDeepsearchStage(config, self.search_service, self.client)
|
|
40
41
|
self.summary_stage = SummaryStage(config, self.search_service, self.client)
|
|
41
42
|
|
|
42
43
|
async def execute(
|
|
@@ -60,6 +61,16 @@ class ModularPipeline:
|
|
|
60
61
|
conversation_history=conversation_history,
|
|
61
62
|
)
|
|
62
63
|
|
|
64
|
+
# Determine if model supports image input
|
|
65
|
+
model_cfg_dict = next((m for m in self.config.models if m.get("name") == active_model), None)
|
|
66
|
+
if model_cfg_dict:
|
|
67
|
+
context.image_input_supported = model_cfg_dict.get("image_input", True)
|
|
68
|
+
else:
|
|
69
|
+
context.image_input_supported = True # Default to True if unknown
|
|
70
|
+
|
|
71
|
+
logger.info(f"Pipeline Execution: Model '{active_model}' Image Input Supported: {context.image_input_supported}")
|
|
72
|
+
|
|
73
|
+
|
|
63
74
|
trace: Dict[str, Any] = {
|
|
64
75
|
"instruct_rounds": [],
|
|
65
76
|
"summary": None,
|
|
@@ -82,21 +93,33 @@ class ModularPipeline:
|
|
|
82
93
|
if context.should_refuse:
|
|
83
94
|
return self._build_refusal_response(context, conversation_history, active_model, stats)
|
|
84
95
|
|
|
85
|
-
# === Stage 2:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
96
|
+
# === Stage 2: Deepsearch Loop (if mode is deepsearch) ===
|
|
97
|
+
if context.selected_mode == "deepsearch":
|
|
98
|
+
MAX_DEEPSEARCH_ITERATIONS = 3
|
|
99
|
+
logger.info(f"Pipeline: Mode is 'deepsearch', starting loop (max {MAX_DEEPSEARCH_ITERATIONS} iterations)")
|
|
100
|
+
|
|
101
|
+
for i in range(MAX_DEEPSEARCH_ITERATIONS):
|
|
102
|
+
logger.info(f"Pipeline: Stage 2 - Deepsearch Iteration {i + 1}")
|
|
103
|
+
deepsearch_result = await self.instruct_deepsearch_stage.execute(context)
|
|
104
|
+
|
|
105
|
+
# Trace & Usage
|
|
106
|
+
deepsearch_result.trace["stage_name"] = f"Deepsearch (Iteration {i + 1})"
|
|
107
|
+
trace["instruct_rounds"].append(deepsearch_result.trace)
|
|
108
|
+
usage_totals["input_tokens"] += deepsearch_result.usage.get("input_tokens", 0)
|
|
109
|
+
usage_totals["output_tokens"] += deepsearch_result.usage.get("output_tokens", 0)
|
|
110
|
+
|
|
111
|
+
# Check if should stop
|
|
112
|
+
if deepsearch_result.data.get("should_stop"):
|
|
113
|
+
logger.info(f"Pipeline: Deepsearch loop ended at iteration {i + 1}")
|
|
114
|
+
break
|
|
115
|
+
else:
|
|
116
|
+
logger.info("Pipeline: Mode is 'fast', skipping deepsearch stage")
|
|
94
117
|
|
|
95
118
|
# === Stage 3: Summary ===
|
|
96
119
|
# Collect page screenshots if image mode (already rendered in InstructStage)
|
|
97
120
|
all_images = list(images) if images else []
|
|
98
121
|
|
|
99
|
-
if
|
|
122
|
+
if context.image_input_supported:
|
|
100
123
|
# Collect pre-rendered screenshots from web_results
|
|
101
124
|
for r in context.web_results:
|
|
102
125
|
if r.get("_type") == "page" and r.get("screenshot_b64"):
|
|
@@ -134,17 +157,27 @@ class ModularPipeline:
|
|
|
134
157
|
# 3. Update structured response with cached (base64) URLs
|
|
135
158
|
for ref in structured.get("references", []):
|
|
136
159
|
if ref.get("images"):
|
|
137
|
-
#
|
|
138
|
-
# Discard original URLs if download failed, to prevent broken images in UI
|
|
160
|
+
# Keep cached images, but preserve original URLs as fallback
|
|
139
161
|
new_images = []
|
|
140
162
|
for img in ref["images"]:
|
|
163
|
+
# 1. Already Base64 (from Search Injection) -> Keep it
|
|
164
|
+
if img.startswith("data:"):
|
|
165
|
+
new_images.append(img)
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
# 2. Cached successfully -> Keep it
|
|
141
169
|
cached_val = cached_map.get(img)
|
|
142
170
|
if cached_val and cached_val.startswith("data:"):
|
|
143
171
|
new_images.append(cached_val)
|
|
172
|
+
# 3. Else -> DROP IT (User request: "Delete Fallback, must download in advance")
|
|
144
173
|
ref["images"] = new_images
|
|
145
174
|
except Exception as e:
|
|
146
175
|
logger.warning(f"Pipeline: Image caching failed: {e}")
|
|
147
176
|
|
|
177
|
+
# Debug: Log image counts
|
|
178
|
+
total_ref_images = sum(len(ref.get("images", []) or []) for ref in structured.get("references", []))
|
|
179
|
+
logger.info(f"Pipeline: Final structured response has {len(structured.get('references', []))} refs with {total_ref_images} images total")
|
|
180
|
+
|
|
148
181
|
stages_used = self._build_stages_ui(trace, context, images)
|
|
149
182
|
|
|
150
183
|
conversation_history.append({"role": "user", "content": user_input})
|