PyPI - entari-plugin-hyw - Versions diffs - 3.4.2__py3-none-any.whl → 3.5.0rc2__py3-none-any.whl - Mend

entari-plugin-hyw 3.4.2py3-none-any.whl → 3.5.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (92) hide show

entari_plugin_hyw/__init__.py +78 -158
entari_plugin_hyw/assets/card-dist/index.html +396 -0
entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
entari_plugin_hyw/card-ui/.gitignore +24 -0
entari_plugin_hyw/card-ui/README.md +5 -0
entari_plugin_hyw/card-ui/index.html +16 -0
entari_plugin_hyw/card-ui/package-lock.json +2342 -0
entari_plugin_hyw/card-ui/package.json +31 -0
entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
entari_plugin_hyw/card-ui/public/vite.svg +1 -0
entari_plugin_hyw/card-ui/src/App.vue +410 -0
entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +385 -0
entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
entari_plugin_hyw/card-ui/src/components/StageCard.vue +183 -0
entari_plugin_hyw/card-ui/src/main.ts +5 -0
entari_plugin_hyw/card-ui/src/style.css +8 -0
entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
entari_plugin_hyw/card-ui/src/types.ts +52 -0
entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
entari_plugin_hyw/card-ui/tsconfig.json +7 -0
entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
entari_plugin_hyw/card-ui/vite.config.ts +16 -0
entari_plugin_hyw/{core/history.py → history.py} +25 -1
entari_plugin_hyw/image_cache.py +283 -0
entari_plugin_hyw/{utils/misc.py → misc.py} +0 -3
entari_plugin_hyw/{core/pipeline.py → pipeline.py} +236 -86
entari_plugin_hyw/{utils/prompts_cn.py → prompts.py} +10 -25
entari_plugin_hyw/render_vue.py +314 -0
entari_plugin_hyw/{utils/search.py → search.py} +227 -10
{entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/METADATA +5 -2
entari_plugin_hyw-3.5.0rc2.dist-info/RECORD +88 -0
entari_plugin_hyw/assets/libs/highlight.css +0 -10
entari_plugin_hyw/assets/libs/highlight.js +0 -1213
entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
entari_plugin_hyw/assets/libs/katex.css +0 -1
entari_plugin_hyw/assets/libs/katex.js +0 -1
entari_plugin_hyw/assets/libs/tailwind.css +0 -1
entari_plugin_hyw/assets/package-lock.json +0 -953
entari_plugin_hyw/assets/package.json +0 -16
entari_plugin_hyw/assets/tailwind.config.js +0 -12
entari_plugin_hyw/assets/tailwind.input.css +0 -235
entari_plugin_hyw/assets/template.html +0 -157
entari_plugin_hyw/assets/template.html.bak +0 -157
entari_plugin_hyw/assets/template.j2 +0 -400
entari_plugin_hyw/core/__init__.py +0 -0
entari_plugin_hyw/core/config.py +0 -38
entari_plugin_hyw/core/hyw.py +0 -48
entari_plugin_hyw/core/render.py +0 -630
entari_plugin_hyw/utils/__init__.py +0 -2
entari_plugin_hyw/utils/browser.py +0 -40
entari_plugin_hyw/utils/playwright_tool.py +0 -36
entari_plugin_hyw/utils/prompts.py +0 -119
entari_plugin_hyw-3.4.2.dist-info/RECORD +0 -49
{entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/WHEEL +0 -0
{entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/top_level.txt +0 -0

entari_plugin_hyw/{core/pipeline.py → pipeline.py} RENAMED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import html
 import json
+import re
 import time
 from contextlib import asynccontextmanager
 from typing import Any, Dict, List, Optional, Tuple
@@ -8,16 +9,14 @@ from typing import Any, Dict, List, Optional, Tuple
 from loguru import logger
 from openai import AsyncOpenAI
-from .config import HYWConfig
-from ..utils.search import SearchService
-from ..utils.prompts import (
+from .search import SearchService
+from .image_cache import get_cached_images
+from .prompts import (
     AGENT_SP,
     AGENT_SP_INSTRUCT_VISION_ADD,
     AGENT_SP_TOOLS_STANDARD_ADD,
     AGENT_SP_TOOLS_AGENT_ADD,
     AGENT_SP_SEARCH_ADD,
-    AGENT_SP_PAGE_ADD,
-    AGENT_SP_IMAGE_SEARCH_ADD,
     INSTRUCT_SP,
     INSTRUCT_SP_VISION_ADD,
     VISION_SP,
@@ -33,7 +32,7 @@ class ProcessingPipeline:
     Core pipeline (vision -> instruct/search -> agent).
     """
-    def __init__(self, config: HYWConfig):
+    def __init__(self, config: Any):
         self.config = config
         self.search_service = SearchService(config)
         self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
@@ -120,11 +119,9 @@ class ProcessingPipeline:
         final_response_content = ""
         structured: Dict[str, Any] = {}
-        # Reset search cache and ID counters for this execution
+        # Reset search cache and ID counter for this execution
         self.all_web_results = []
-        self.search_id_counter = 0
-        self.page_id_counter = 0
-        self.image_id_counter = 0
+        self.global_id_counter = 0
         try:
             logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
@@ -188,7 +185,8 @@ class ProcessingPipeline:
                 vision_text=vision_text,
                 model=instruct_model,
             )
-            instruct_time = time.time() - instruct_start
+            # Instruct time excludes search time (search_time is returned separately)
+            instruct_time = time.time() - instruct_start - search_time
             # Calculate Instruct Cost
             instruct_cost = 0.0
@@ -265,17 +263,18 @@ class ProcessingPipeline:
                 if vision_text:
                     system_prompt += AGENT_SP_INSTRUCT_VISION_ADD.format(vision_msgs=vision_text)
-                # Append search results
-                if has_search_results and search_msgs_text:
-                    system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs=search_msgs_text)
-                # Append crawled page content
+                # Append all search results (text, page, image) in one block
                 page_msgs_text = self._format_page_msgs()
+                all_search_parts = []
+                if has_search_results and search_msgs_text:
+                    all_search_parts.append(search_msgs_text)
                 if page_msgs_text:
-                    system_prompt += AGENT_SP_PAGE_ADD.format(page_msgs=page_msgs_text)
+                    all_search_parts.append(page_msgs_text)
                 if has_image_results and image_msgs_text:
-                     system_prompt += AGENT_SP_IMAGE_SEARCH_ADD.format(image_search_msgs=image_msgs_text)
+                    all_search_parts.append(image_msgs_text)
+                if all_search_parts:
+                    system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs="\n".join(all_search_parts))
                 last_system_prompt = system_prompt
@@ -331,6 +330,7 @@ class ProcessingPipeline:
                         "tool_results": [],
                         "tool_time": tool_exec_time,
                         "llm_time": step_llm_time,
+                        "usage": step_usage,
                     }
                     for i, result in enumerate(results):
                         tc = tool_calls[i]
@@ -428,7 +428,7 @@ class ProcessingPipeline:
                 stages_used.append({
                     "name": "Vision",
                     "model": v_model,
-                    "icon_config": getattr(self.config, "vision_icon", None) or infer_icon(v_model, v_base_url),
+                    "icon_config": infer_icon(v_model, v_base_url),
                     "provider": infer_provider(v_base_url),
                     "time": v.get("time", 0),
                     "cost": v.get("cost", 0.0)
@@ -441,20 +441,33 @@ class ProcessingPipeline:
                 stages_used.append({
                     "name": "Instruct",
                     "model": i_model,
-                    "icon_config": getattr(self.config, "instruct_icon", None) or infer_icon(i_model, i_base_url),
+                    "icon_config": infer_icon(i_model, i_base_url),
                     "provider": infer_provider(i_base_url),
                     "time": i.get("time", 0),
                     "cost": i.get("cost", 0.0)
                 })
-            if has_search_results and search_payloads:
+            # Show Search stage if we have ANY search results (text OR image)
+            if (has_search_results or has_image_results) and search_payloads:
+                # Collect initial search results for the Search stage card
+                initial_refs = [
+                    {"title": r.get("title", ""), "url": r.get("url", ""), "domain": r.get("domain", "")}
+                    for r in self.all_web_results if r.get("_type") == "search"
+                ]
+                initial_images = [
+                    {"title": r.get("title", ""), "url": r.get("url", ""), "thumbnail": r.get("thumbnail", "")}
+                    for r in self.all_web_results if r.get("_type") == "image"
+                ]
                 stages_used.append({
                     "name": "Search",
                     "model": getattr(self.config, "search_name", "DuckDuckGo"),
                     "icon_config": "search",
                     "provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
                     "time": search_time,
-                    "cost": 0.0
+                    "cost": 0.0,
+                    "references": initial_refs,
+                    "image_references": initial_images
                 })
             # Add Crawler stage if Instruct used crawl_page
@@ -496,18 +509,24 @@ class ProcessingPipeline:
                 a_model = a.get("model", "") or active_model
                 a_base_url = a.get("base_url", "") or self.config.base_url
                 steps = a.get("steps", [])
-                agent_icon = getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url)
+                agent_icon = infer_icon(a_model, a_base_url)
                 agent_provider = infer_provider(a_base_url)
                 for s in steps:
                     if "tool_calls" in s:
                         # 1. Agent Thought Stage (with LLM time)
+                        # Calculate step cost
+                        step_usage = s.get("usage", {})
+                        step_cost = 0.0
+                        if a_in_price > 0 or a_out_price > 0:
+                             step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
                         stages_used.append({
                             "name": "Agent",
                             "model": a_model,
                             "icon_config": agent_icon,
                             "provider": agent_provider,
-                            "time": s.get("llm_time", 0), "cost": 0
+                            "time": s.get("llm_time", 0), "cost": step_cost
                         })
                         # 2. Grouped Tool Stages
@@ -587,11 +606,33 @@ class ProcessingPipeline:
                             "time": 0, "cost": 0
                         })
-                # Assign total time/cost to last Agent stage
-                last_agent = next((s for s in reversed(stages_used) if s["name"] == "Agent"), None)
-                if last_agent:
-                    last_agent["time"] = a.get("time", 0)
-                    last_agent["cost"] = a.get("cost", 0.0)
+            # Assign total time/cost to last Agent stage
+            # Sum up total time/cost for UI/stats (implicit via loop above)
+            # No need to assign everything to last agent anymore as we distribute it.
+            # --- Final Filter: Only show cited items in workflow cards ---
+            cited_urls = {ref['url'] for ref in (structured.get("references", []) +
+                                               structured.get("page_references", []) +
+                                               structured.get("image_references", []))}
+            # Find images already rendered in markdown content (to avoid duplicate display)
+            markdown_image_urls = set()
+            md_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
+            for match in md_img_pattern.finditer(final_content):
+                markdown_image_urls.add(match.group(1))
+            for s in stages_used:
+                if "references" in s and s["references"]:
+                    s["references"] = [r for r in s["references"] if r.get("url") in cited_urls]
+                # Filter out images already shown in markdown content
+                # Check both url AND thumbnail since either might be used in markdown
+                if "image_references" in s and s["image_references"]:
+                    s["image_references"] = [
+                        r for r in s["image_references"]
+                        if r.get("url") not in markdown_image_urls and (r.get("thumbnail") or "") not in markdown_image_urls
+                    ]
+                if "crawled_pages" in s and s["crawled_pages"]:
+                    s["crawled_pages"] = [r for r in s["crawled_pages"] if r.get("url") in cited_urls]
             # Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
             # Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
@@ -606,6 +647,67 @@ class ProcessingPipeline:
             # Update the reference (since it might be used by caller)
             current_history[:] = cleaned_history
+            # --- Apply cached images to reduce render time ---
+            # Collect all image URLs that need caching (avoid duplicates when thumbnail == url)
+            all_image_urls = set()
+            for img_ref in structured.get("image_references", []):
+                if img_ref.get("thumbnail"):
+                    all_image_urls.add(img_ref["thumbnail"])
+                if img_ref.get("url"):
+                    all_image_urls.add(img_ref["url"])
+            for stage in stages_used:
+                for img_ref in stage.get("image_references", []):
+                    if img_ref.get("thumbnail"):
+                        all_image_urls.add(img_ref["thumbnail"])
+                    if img_ref.get("url"):
+                        all_image_urls.add(img_ref["url"])
+            # Also collect image URLs from markdown content
+            markdown_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
+            markdown_urls = markdown_img_pattern.findall(final_content)
+            all_image_urls.update(markdown_urls)
+            # Get cached versions (waits for pending downloads, with timeout)
+            if all_image_urls:
+                try:
+                    cached_map = await get_cached_images(list(all_image_urls), wait_timeout=3.0)
+                    # Apply cached URLs to structured response
+                    for img_ref in structured.get("image_references", []):
+                        if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
+                            img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
+                        if img_ref.get("url") and img_ref["url"] in cached_map:
+                            img_ref["url"] = cached_map[img_ref["url"]]
+                    # Apply cached URLs to stages
+                    for stage in stages_used:
+                        for img_ref in stage.get("image_references", []):
+                            if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
+                                img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
+                            if img_ref.get("url") and img_ref["url"] in cached_map:
+                                img_ref["url"] = cached_map[img_ref["url"]]
+                    # Replace image URLs in markdown content with cached versions
+                    def replace_markdown_img(match):
+                        full_match = match.group(0)
+                        url = match.group(1)
+                        cached_url = cached_map.get(url)
+                        if cached_url and cached_url != url:
+                            return full_match.replace(url, cached_url)
+                        return full_match
+                    final_content = markdown_img_pattern.sub(replace_markdown_img, final_content)
+                    structured["response"] = markdown_img_pattern.sub(replace_markdown_img, structured.get("response", ""))
+                    # Log cache stats
+                    from .image_cache import get_image_cache
+                    cache_stats = get_image_cache().get_stats()
+                    logger.info(f"ImageCache stats: {cache_stats}")
+                except Exception as e:
+                    logger.warning(f"Failed to apply image cache: {e}")
             return {
                 "llm_response": final_content,
                 "structured_response": structured,
@@ -627,11 +729,7 @@ class ProcessingPipeline:
             }
     def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
-        """Parse response and auto-infer references from [N] citations in body text.
-        New simplified format:
-        - Body text uses [1][2] format for citations
-        - No ref code block needed - we auto-infer from citations
+        """Parse response and auto-infer references from citations and markdown images.
         """
         parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
         if not text:
@@ -639,9 +737,14 @@ class ProcessingPipeline:
         import re
-        remaining_text = text
+        # 1. Strip trailing reference/source list
+        body_text = text
+        ref_list_pattern = re.compile(r'(?:\n\s*|^)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources|参考资料|引用)[\s\S]*$', re.IGNORECASE | re.MULTILINE)
+        body_text = ref_list_pattern.sub('', body_text)
+        remaining_text = body_text.strip()
-        # 1. Try to unwrap JSON if the model acted like a ReAct agent
+        # 2. Unwrap JSON if necessary
         try:
             if remaining_text.strip().startswith("{") and "action" in remaining_text:
                 data = json.loads(remaining_text)
@@ -650,67 +753,114 @@ class ProcessingPipeline:
         except Exception:
             pass
-        # 2. Extract all [N] citations from body text (scan left to right for order)
+        # 3. Identify all citations [N] and direct markdown images ![]()
+        cited_ids = []
         body_pattern = re.compile(r'\[(\d+)\]')
-        id_order = []  # Preserve citation order
         for match in body_pattern.finditer(remaining_text):
             try:
-                id_val = int(match.group(1))
-                if id_val not in id_order:
-                    id_order.append(id_val)
-            except ValueError:
-                pass
-        # 3. Build references by looking up cited IDs in all_web_results
-        # Order by appearance in text
-        old_to_new_map = {}  # old_id -> new_id (for search & page only)
+                cited_ids.append(int(match.group(1)))
+            except ValueError: pass
+        # Also find direct URLs in ![]()
+        direct_image_urls = []
+        img_pattern = re.compile(r'!\[.*?\]\((.*?)\)')
+        for match in img_pattern.finditer(remaining_text):
+            url = match.group(1).strip()
+            if url and not url.startswith('['): # Not a [N] citation
+                direct_image_urls.append(url)
+        # 4. Build Citation Maps and Reference Lists
+        unified_id_map = {}
+        # Keep track of what we've already added to avoid duplicates
+        seen_urls = set()
+        # id_order needs to be unique and preserve appearance order
+        id_order = []
+        for id_val in cited_ids:
+            if id_val not in id_order:
+                id_order.append(id_val)
+        # Process [N] citations first to determine numbering
         for old_id in id_order:
-            # Find in all_web_results by _id
             result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
+            if not result_item: continue
+            url = result_item.get("url", "")
+            item_type = result_item.get("_type", "")
+            entry = {
+                "title": result_item.get("title", ""),
+                "url": url,
+                "domain": result_item.get("domain", "")
+            }
+            if item_type == "search":
+                parsed["references"].append(entry)
+                unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
+                seen_urls.add(url)
+            elif item_type == "page":
+                parsed["page_references"].append(entry)
+                unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
+                seen_urls.add(url)
+            elif item_type == "image":
+                 entry["thumbnail"] = result_item.get("thumbnail", "")
+                 if url not in seen_urls:
+                    parsed["image_references"].append(entry)
+                    seen_urls.add(url)
+                 # Note: Images cited as [N] might be used in text like ![...]([N])
+                 # We'll handle this in replacement
+        # Now handle direct image URLs from ![]() that weren't cited as [N]
+        for url in direct_image_urls:
+            if url in seen_urls: continue
+            # Find in all_web_results
+            result_item = next((r for r in self.all_web_results if (r.get("url") == url or r.get("image") == url) and r.get("_type") == "image"), None)
             if result_item:
                 entry = {
                     "title": result_item.get("title", ""),
-                    "url": result_item.get("url", ""),
-                    "domain": result_item.get("domain", "")
+                    "url": url,
+                    "domain": result_item.get("domain", ""),
+                    "thumbnail": result_item.get("thumbnail", "")
                 }
-                item_type = result_item.get("_type", "")
-                # Auto-classify by type
-                if item_type == "search":
-                    parsed["references"].append(entry)
-                    old_to_new_map[old_id] = len(parsed["references"])
-                elif item_type == "page":
-                    parsed["page_references"].append(entry)
-                    old_to_new_map[old_id] = len(parsed["page_references"])
-                elif item_type == "image":
-                    # Collect image but don't add to map (will be stripped from text)
-                    entry["thumbnail"] = result_item.get("thumbnail", "")
-                    parsed["image_references"].append(entry)
-                    # Note: no old_to_new_map entry - image citations will be removed
-        # 4. Replace [old_id] with [new_id] in text, or remove if image
-        def replace_id(match):
-            try:
-                old_id = int(match.group(1))
-                new_id = old_to_new_map.get(old_id)
-                if new_id is not None:
-                    return f"[{new_id}]"
-                else:
-                    # Check if it's an image reference (not in map)
-                    item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
-                    if item and item.get("_type") == "image":
-                        return ""  # Remove image citations from text
-            except ValueError:
-                pass
-            return match.group(0)
+                parsed["image_references"].append(entry)
+                seen_urls.add(url)
-        remaining_text = body_pattern.sub(replace_id, remaining_text)
+        # 5. Replacement Logic
+        # Define image replacement map separately to handle ![...]([N])
+        image_url_map = {} # old_id -> raw_url
+        for old_id in id_order:
+            item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
+            if item and item.get("_type") == "image":
+                image_url_map[old_id] = item.get("url", "")
+        def refined_replace(text):
+            # First, handle ![...]([N]) specifically
+            # We want to replace the [N] with the actual URL so the markdown renders
+            def sub_img_ref(match):
+                alt = match.group(1)
+                ref = match.group(2)
+                inner_match = body_pattern.match(ref)
+                if inner_match:
+                    oid = int(inner_match.group(1))
+                    if oid in image_url_map:
+                        return f"![{alt}]({image_url_map[oid]})"
+                return match.group(0)
+            text = re.sub(r'!\[(.*?)\]\((.*?)\)', sub_img_ref, text)
+            # Then handle normal [N] replacements
+            def sub_norm_ref(match):
+                oid = int(match.group(1))
+                if oid in unified_id_map:
+                    return f"[{unified_id_map[oid]}]"
+                if oid in image_url_map:
+                    return "" # Remove standalone image citations like [5] if they aren't in ![]()
+                return match.group(0)
+            return body_pattern.sub(sub_norm_ref, text)
-        parsed["response"] = remaining_text.strip()
+        final_text = refined_replace(remaining_text)
+        parsed["response"] = final_text.strip()
         return parsed
     async def _safe_route_tool(self, tool_call):
@@ -1053,4 +1203,4 @@ class ProcessingPipeline:
         except Exception:
             pass
         # Do NOT close shared crawler here, as pipeline instances are now per-request.
-        # Shared crawler lifecycle is managed by HYW.close() or global cleanup.
+        # Shared crawler lifecycle is managed globally.

entari_plugin_hyw/{utils/prompts_cn.py → prompts.py} RENAMED Viewed

@@ -34,7 +34,7 @@ INSTRUCT_SP = """# 你是一个专业的指导专家.
 {tools_desc}
 ## 你的回复
-调用工具后无需回复额外文本节省token.
+调用工具后无需回复额外文本节省 token.
 ## 用户消息
 ```
@@ -57,16 +57,19 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
 ## 过程要求
 当不调用工具发送文本, 即会变成最终回复, 请遵守:
-- 直接给出一篇报告, 无需回答用户消息
 - 语言: {language}, 百科式风格, 语言严谨不啰嗦.
 - 正文格式:
-  - 使用 Markdown 格式, 支持 hightlight, katex
-  - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
-  - 内容丰富突出重点.
+  - 先给出一个 `# `大标题约 8-10 个字, 不要有多余废话, 不要直接回答用户的提问.
+  - 然后紧接着给出一个 <summary>...</summary>, 除了给出一个约 100 字的纯文本简介, 介绍本次输出的长文的清晰、重点概括.
+  - 随后开始详细二级标题 + markdown 正文, 语言描绘格式丰富多样, 简洁准确可信.
+  - 请不要给出过长的代码、表格列数等, 请控制字数在 600 字内, 只讲重点和准确的数据.
+  - 不支持渲染: 链接, 图片链接, mermaid
+  - 支持渲染: 公式, 代码高亮, 只在需要的时候给出.
+  - 图片链接、链接框架会自动渲染出, 你无需显式给出.
 - 引用:
   > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
   - 信息来源已按获取顺序编号为 [1], [2], [3]...
-  - 正文中直接使用 [1][2] 格式引用, 只引用对回答有帮助的来源
+  - 正文中直接使用 [1] 格式引用, 只引用对回答有帮助的来源, 一次只能引用一个
   - 无需给出参考文献列表, 系统会自动生成
 ## 用户消息
@@ -96,24 +99,6 @@ AGENT_SP_INSTRUCT_VISION_ADD = """
 """
 AGENT_SP_SEARCH_ADD = """
-## 搜索专家消息
-```text
+## 联网信息
 {search_msgs}
-```
-"""
-AGENT_SP_PAGE_ADD = """
-## 页面内容专家消息
-```text
-{page_msgs}
-```
-- 引用页面内容时, 必须使用 `page:id` 格式
-"""
-AGENT_SP_IMAGE_SEARCH_ADD = """
-## 图像搜索专家消息
-```text
-{image_search_msgs}
-```
-- 每进行一次 internal_image_search, 挑选 1 张图像插入正文
 """

entari-plugin-hyw 3.4.2__py3-none-any.whl → 3.5.0rc2__py3-none-any.whl

Potentially problematic release.

entari-plugin-hyw 3.4.2py3-none-any.whl → 3.5.0rc2py3-none-any.whl