PyPI - entari-plugin-hyw - Versions diffs - 4.0.0rc7__py3-none-any.whl → 4.0.0rc9__py3-none-any.whl - Mend

entari-plugin-hyw 4.0.0rc7py3-none-any.whl → 4.0.0rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

entari_plugin_hyw/modular_pipeline.py → hyw_core/pipeline.py RENAMED Viewed

@@ -7,16 +7,15 @@ Simpler flow with self-correction/feedback loop.
 import asyncio
 import time
+import re
 from typing import Any, Dict, List, Optional, Callable, Awaitable
 from loguru import logger
 from openai import AsyncOpenAI
-from .stage_base import StageContext
-from .stage_instruct import InstructStage
-from .stage_instruct_deepsearch import InstructDeepsearchStage
-from .stage_summary import SummaryStage
-from .stage_vision import VisionStage
+from .stages.base import StageContext, StageResult
+from .stages.base import StageContext, StageResult, BaseStage
+from .stages.summary import SummaryStage
 from .search import SearchService
@@ -25,27 +24,31 @@ class ModularPipeline:
     Modular Pipeline.
     Flow:
-    1. Instruct: Initial Discovery + Mode Decision (fast/deepsearch).
-    2. [Deepsearch only] Instruct Deepsearch Loop: Supplement info (max 3 iterations).
-    3. Summary: Generate final response.
+    1. Input Analysis:
+       - If Images -> Skip Search -> Summary
+       - If Text -> Execute Search (or URL fetch) -> Summary
+    2. Summary: Generate final response.
     """
-    def __init__(self, config: Any, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
+    def __init__(self, config: Any, search_service: SearchService, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
         self.config = config
         self.send_func = send_func
-        self.search_service = SearchService(config)
+        self.search_service = search_service
         self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
         # Initialize stages
-        self.instruct_stage = InstructStage(config, self.search_service, self.client, send_func=send_func)
-        self.instruct_deepsearch_stage = InstructDeepsearchStage(config, self.search_service, self.client)
         self.summary_stage = SummaryStage(config, self.search_service, self.client)
-        self.vision_stage = VisionStage(config, self.search_service, self.client)
-    def _has_vision_model(self) -> bool:
-        """Check if a vision model is configured."""
-        vision_cfg = self.config.get_model_config("vision")
-        return bool(vision_cfg.get("model_name"))
+    @property
+    def _send_func(self) -> Optional[Callable[[str], Awaitable[None]]]:
+        """Getter for _send_func (alias for send_func)."""
+        return self.send_func
+    @_send_func.setter
+    def _send_func(self, value: Optional[Callable[[str], Awaitable[None]]]):
+        """Setter for _send_func - updates send_func and propagates to stages."""
+        self.send_func = value
     async def execute(
         self,
@@ -53,8 +56,6 @@ class ModularPipeline:
         conversation_history: List[Dict],
         model_name: str = None,
         images: List[str] = None,
-        vision_model_name: str = None,
-        selected_vision_model: str = None,
     ) -> Dict[str, Any]:
         """Execute the modular pipeline."""
         start_time = time.time()
@@ -63,7 +64,7 @@ class ModularPipeline:
         active_model = model_name or self.config.model_name
         if not active_model:
              # Fallback to instruct model for logging/context
-             active_model = self.config.get_model_config("instruct").get("model_name")
+             active_model = self.config.get_model_config("instruct").model_name
         context = StageContext(
             user_input=user_input,
@@ -89,59 +90,92 @@ class ModularPipeline:
         try:
             logger.info(f"Pipeline: Processing '{user_input[:30]}...'")
-            # === Stage 0: Vision (if images and vision model configured) ===
-            if images and self._has_vision_model():
-                logger.info("Pipeline: Stage 0 - Vision (generating image description)")
-                vision_result = await self.vision_stage.execute(context, images)
+            # === Image-First Logic ===
+            # When user provides images, skip search and go directly to Instruct
+            # Images will be passed through to both Instruct and Summary stages
+            has_user_images = bool(images)
+            if has_user_images:
+                logger.info(f"Pipeline: {len(images)} user image(s) detected. Skipping search -> Instruct.")
+            # === Search-First Logic (only when no images) ===
+            # 1. URL Detection
+            # Updated to capture full URLs including queries and paths
+            url_pattern = re.compile(r'https?://(?:[-\w./?=&%#]+)')
+            found_urls = url_pattern.findall(user_input)
+            hit_content = False
+            # Skip URL fetch and search if user provided images or long query
+            is_long_query = len(user_input) > 20
+            if has_user_images:
+                hit_content = False  # Force into Instruct path
+            elif is_long_query:
+                logger.info(f"Pipeline: Long query ({len(user_input)} chars). Skipping direct search/fetch -> Instruct.")
+                hit_content = False
+            elif found_urls:
+                logger.info(f"Pipeline: Detected {len(found_urls)} URLs. Executing direct fetch...")
+                # Fetch pages (borrowing logic from InstructStage's batch fetch would be ideal,
+                # but we'll use search_service directly and simulate what Instruct did for context)
-                if vision_result.success and vision_result.data.get("description"):
-                    context.vision_description = vision_result.data["description"]
-                    logger.info(f"Pipeline: Vision description generated ({len(context.vision_description)} chars)")
-                    # Add vision trace
-                    trace["vision"] = vision_result.trace
-                    usage_totals["input_tokens"] += vision_result.usage.get("input_tokens", 0)
-                    usage_totals["output_tokens"] += vision_result.usage.get("output_tokens", 0)
-                    # Clear images since we have the description now
-                    # (don't pass raw images to later stages when using vision model)
-                    images = []
-            # === Stage 1: Instruct (Initial Discovery) ===
-            logger.info("Pipeline: Stage 1 - Instruct")
-            instruct_result = await self.instruct_stage.execute(context)
-            # Trace & Usage
-            instruct_result.trace["stage_name"] = "Instruct (Round 1)"
-            trace["instruct_rounds"].append(instruct_result.trace)
-            usage_totals["input_tokens"] += instruct_result.usage.get("input_tokens", 0)
-            usage_totals["output_tokens"] += instruct_result.usage.get("output_tokens", 0)
-            # Check refuse
-            if context.should_refuse:
-                return self._build_refusal_response(context, conversation_history, active_model, stats)
+                # Fetch
+                fetch_results = await self.search_service.fetch_pages_batch(found_urls)
+                # Pre-render screenshots if needed (similar to InstructStage logic)
+                # For brevity/cleanliness, assuming fetch_pages_batch returns what we need or we process it.
+                # Ideally we want screenshots for the UI. The serivce.fetch_page usually returns raw data.
+                # We need to render them if we want screenshots.
+                # To keep it simple for this file, we'll skip complex screenshot rendering here OR
+                # we rely on the summary stage to just use the text.
+                # But the user logic implies "Search/Fetch Hit -> Summary".
+                # Let's populate context.web_results
+                for i, page_data in enumerate(fetch_results):
+                    if page_data.get("content"):
+                         hit_content = True
+                         context.web_results.append({
+                             "_id": context.next_id(),
+                             "_type": "page",
+                             "title": page_data.get("title", "Page"),
+                             "url": page_data.get("url", found_urls[i]),
+                             "content": page_data.get("content", ""),
+                             "images": page_data.get("images", []),
+                             # For now, no screenshot unless we call renderer.
+                             # If critical, we can add it later.
+                         })
-            # === Stage 2: Deepsearch Loop (if mode is deepsearch) ===
-            if context.selected_mode == "deepsearch":
-                MAX_DEEPSEARCH_ITERATIONS = 3
-                logger.info(f"Pipeline: Mode is 'deepsearch', starting loop (max {MAX_DEEPSEARCH_ITERATIONS} iterations)")
+            # 2. Search (if no URLs or just always try search if simple query?)
+            # The prompt says: "judging result quantity > 0".
+            if not hit_content and not has_user_images and not is_long_query and user_input.strip():
+                logger.info("Pipeline: No URLs found or fetched. Executing direct search...")
+                search_start = time.time()
+                search_results = await self.search_service.search(user_input)
+                context.search_time = time.time() - search_start
-                for i in range(MAX_DEEPSEARCH_ITERATIONS):
-                    logger.info(f"Pipeline: Stage 2 - Deepsearch Iteration {i + 1}")
-                    deepsearch_result = await self.instruct_deepsearch_stage.execute(context)
-                    # Trace & Usage
-                    deepsearch_result.trace["stage_name"] = f"Deepsearch (Iteration {i + 1})"
-                    trace["instruct_rounds"].append(deepsearch_result.trace)
-                    usage_totals["input_tokens"] += deepsearch_result.usage.get("input_tokens", 0)
-                    usage_totals["output_tokens"] += deepsearch_result.usage.get("output_tokens", 0)
-                    # Check if should stop
-                    if deepsearch_result.data.get("should_stop"):
-                        logger.info(f"Pipeline: Deepsearch loop ended at iteration {i + 1}")
-                        break
-            else:
-                logger.info("Pipeline: Mode is 'fast', skipping deepsearch stage")
+                # Filter out the raw debug page
+                valid_results = [r for r in search_results if not r.get("_hidden")]
+                if valid_results:
+                    logger.info(f"Pipeline: Search found {len(valid_results)} results in {context.search_time:.2f}s. Proceeding to Summary.")
+                    hit_content = True
+                    for item in search_results: # Add all, including hidden debug ones if needed by history
+                        item["_id"] = context.next_id()
+                        if "_type" not in item: item["_type"] = "search"
+                        item["query"] = user_input
+                        context.web_results.append(item)
+                else:
+                    logger.info("Pipeline: Search yielded 0 results.")
+            # === Branching ===
+            if hit_content and not has_user_images:
+                # -> Summary Stage (search/URL results available)
+                logger.info("Pipeline: Content found (URL/Search). Proceeding to Summary.")
+            # If no content was found and no images, we still proceed to Summary but with empty context (Direct Chat)
+            # If images, we proceed to Summary with images.
+            # Refusal check from search results? (Unlikely, but good to keep in mind)
+            pass
             # === Parallel Execution: Summary Generation + Image Prefetching ===
             # We run image prefetching concurrently with Summary generation to save time.
@@ -169,6 +203,9 @@ class ModularPipeline:
                         if r.get("_type") == "page" and r.get("screenshot_b64"):
                             summary_input_images.append(r["screenshot_b64"])
+                if context.should_refuse:
+                     return StageResult(success=True, data={"content": "Refused"}, usage={}, trace={}), 0.0
                 res = await self.summary_stage.execute(
                     context,
                     images=summary_input_images if summary_input_images else None
@@ -204,6 +241,10 @@ class ModularPipeline:
             summary_result, summary_time = await summary_task
             cached_map, prefetch_time = await prefetch_task
+            if context.should_refuse:
+                # Double check if summary triggered refusal
+                return self._build_refusal_response(context, conversation_history, active_model, stats)
             time_diff = abs(summary_time - prefetch_time)
             if summary_time > prefetch_time:
                 logger.info(f"Pipeline: Image Prefetch finished first ({prefetch_time:.2f}s). Summary took {summary_time:.2f}s. (Waited {time_diff:.2f}s for Summary)")
@@ -268,7 +309,8 @@ class ModularPipeline:
                 },
                 "stages_used": stages_used,
                 "web_results": context.web_results,
-                "vision_trace": trace.get("vision"),
+                "trace": trace,
                 "instruct_traces": trace.get("instruct_rounds", []),
             }
@@ -378,6 +420,8 @@ class ModularPipeline:
         # Sort: Fetched first
         search_refs.sort(key=lambda x: x["is_fetched"], reverse=True)
+        logger.debug(f"_build_stages_ui: Found {len(search_refs)} search refs from {len(context.web_results)} web_results")
         if search_refs:
             stages.append({
                 "name": "Search",
@@ -385,30 +429,10 @@ class ModularPipeline:
                 "icon_config": "openai",
                 "provider": "Web",
                 "references": search_refs,
-                "description": f"Found {len(search_refs)} results."
+                "description": f"Found {len(search_refs)} results.",
+                "time": getattr(context, 'search_time', 0)
             })
-        # 2. Vision Stage (if used)
-        if trace.get("vision"):
-            v = trace["vision"]
-            if not v.get("skipped"):
-                usage = v.get("usage", {})
-                vision_cfg = self.config.get_model_config("vision")
-                input_price = vision_cfg.get("input_price") or 0
-                output_price = vision_cfg.get("output_price") or 0
-                cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
-                stages.append({
-                    "name": "Vision",
-                    "model": v.get("model"),
-                    "icon_config": "google",
-                    "provider": "Vision",
-                    "time": v.get("time", 0),
-                    "description": f"Analyzed {v.get('images_count', 0)} image(s).",
-                    "usage": usage,
-                    "cost": cost
-                })
         # 2. Instruct Rounds
         for i, t in enumerate(trace.get("instruct_rounds", [])):
             stage_name = t.get("stage_name", f"Analysis {i+1}")
@@ -424,8 +448,8 @@ class ModularPipeline:
             # Calculate cost from config prices
             usage = t.get("usage", {})
             instruct_cfg = self.config.get_model_config("instruct")
-            input_price = instruct_cfg.get("input_price") or 0
-            output_price = instruct_cfg.get("output_price") or 0
+            input_price = instruct_cfg.input_price or 0
+            output_price = instruct_cfg.output_price or 0
             cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
             stages.append({
@@ -444,8 +468,8 @@ class ModularPipeline:
             s = trace["summary"]
             usage = s.get("usage", {})
             main_cfg = self.config.get_model_config("main")
-            input_price = main_cfg.get("input_price") or 0
-            output_price = main_cfg.get("output_price") or 0
+            input_price = main_cfg.input_price or 0
+            output_price = main_cfg.output_price or 0
             cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
             stages.append({

{entari_plugin_hyw → hyw_core}/search.py RENAMED Viewed

@@ -5,12 +5,11 @@ import time
 from typing import List, Dict, Any, Optional
 from loguru import logger
-from .browser.service import get_screenshot_service
-# New engines
-from .browser.engines.bing import BingEngine
-from .browser.engines.duckduckgo import DuckDuckGoEngine
-from .browser.engines.google import GoogleEngine
-from .browser.engines.default import DefaultEngine
+from .browser_control.service import get_screenshot_service
+# Search engines from browser_control subpackage
+from .browser_control.engines.duckduckgo import DuckDuckGoEngine
+from .browser_control.engines.google import GoogleEngine
+from .browser_control.engines.default import DefaultEngine
 class SearchService:
     def __init__(self, config: Any):
@@ -27,16 +26,14 @@ class SearchService:
         if self._engine_name:
             self._engine_name = self._engine_name.lower()
-        if self._engine_name == "bing":
-            self._engine = BingEngine()
-        elif self._engine_name == "google":
+        if self._engine_name == "google":
             self._engine = GoogleEngine()
-        elif self._engine_name == "duckduckgo":
-            self._engine = DuckDuckGoEngine()
+        elif self._engine_name == "default_address_bar": # Explicitly requested address bar capability if needed
+             self._engine = DefaultEngine()
         else:
-            # Default: use browser address bar search (Google-based)
-            self._engine = DefaultEngine()
-            self._engine_name = "default"
+            # Default: use DuckDuckGo
+            self._engine = DuckDuckGoEngine()
+            self._engine_name = "duckduckgo"
         logger.info(f"SearchService initialized with engine: {self._engine_name}")
@@ -156,3 +153,11 @@ class SearchService:
             timeout = self._fetch_timeout
         service = get_screenshot_service(headless=self._headless)
         return await service.fetch_page(url, timeout=timeout, include_screenshot=include_screenshot)
+    async def screenshot_url(self, url: str, full_page: bool = True) -> Optional[str]:
+        """
+        Capture a screenshot of a URL.
+        Delegates to screenshot service.
+        """
+        service = get_screenshot_service(headless=self._headless)
+        return await service.screenshot_url(url, full_page=full_page)

hyw_core/stages/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+hyw_core.stages - Pipeline Stages
+This subpackage provides the pipeline stage implementations:
+- BaseStage: Abstract base class for all stages
+- StageContext: Shared context between stages
+- StageResult: Stage execution result
+- InstructStage: Initial task planning and search execution
+- SummaryStage: Final response generation
+"""
+from .base import BaseStage, StageContext, StageResult
+from .summary import SummaryStage
+__all__ = [
+    "BaseStage",
+    "StageContext",
+    "StageResult",
+    "SummaryStage",
+]

entari_plugin_hyw/stage_base.py → hyw_core/stages/base.py RENAMED Viewed

@@ -39,8 +39,8 @@ class StageContext:
     # Model capabilities
     image_input_supported: bool = True
-    # Vision description (from VisionStage)
-    vision_description: str = ""
+    # Search timing
+    search_time: float = 0.0
     def next_id(self) -> int:
         """Get next global ID."""

entari_plugin_hyw/stage_summary.py → hyw_core/stages/summary.py RENAMED Viewed

@@ -12,8 +12,8 @@ from typing import Any, Dict, List, Optional
 from loguru import logger
 from openai import AsyncOpenAI
-from .stage_base import BaseStage, StageContext, StageResult
-from .definitions import SUMMARY_REPORT_SP
+from .base import BaseStage, StageContext, StageResult
+from ..definitions import SUMMARY_REPORT_SP, get_refuse_answer_tool
 class SummaryStage(BaseStage):
@@ -35,6 +35,9 @@ class SummaryStage(BaseStage):
         # Format context from web results
         web_content = self._format_web_content(context)
+        # Tools
+        refuse_tool = get_refuse_answer_tool()
         full_context = f"{context.agent_context}\n\n{web_content}"
         # Select prompt
@@ -47,15 +50,13 @@ class SummaryStage(BaseStage):
         # Build Context Message
         context_message = f"## Web Search & Page Content\n\n```context\n{full_context}\n```"
-        # Add vision description if present (from VisionStage)
-        if context.vision_description:
-            vision_context = f"## 用户图片描述\n\n{context.vision_description}"
-            context_message = f"{vision_context}\n\n{context_message}"
         # Build user content
         user_text = context.user_input or "..."
         if images:
-            user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
+            # Add image context message for multimodal input
+            image_context = f"[System: The user has provided {len(images)} image(s). Please analyze these images together with the text query to provide a comprehensive response.]"
+            user_content: List[Dict[str, Any]] = [{"type": "text", "text": f"{image_context}\n\n{user_text}"}]
             for img_b64 in images:
                 url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
                 user_content.append({"type": "image_url", "image_url": {"url": url}})
@@ -72,18 +73,21 @@ class SummaryStage(BaseStage):
         model_cfg = self.config.get_model_config("main")
         client = self._client_for(
-            api_key=model_cfg.get("api_key"),
-            base_url=model_cfg.get("base_url")
+            api_key=model_cfg.api_key,
+            base_url=model_cfg.base_url
         )
-        model = model_cfg.get("model_name") or self.config.model_name
+        model = model_cfg.model_name or self.config.model_name
         try:
             response = await client.chat.completions.create(
                 model=model,
                 messages=messages,
                 temperature=self.config.temperature,
                 extra_body=getattr(self.config, "summary_extra_body", None),
+                tools=[refuse_tool],
+                tool_choice="auto",
             )
         except Exception as e:
             logger.error(f"SummaryStage LLM error: {e}")
@@ -98,6 +102,25 @@ class SummaryStage(BaseStage):
             usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
             usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
+        # Handle Tool Calls (Refusal)
+        tool_calls = response.choices[0].message.tool_calls
+        if tool_calls:
+            for tc in tool_calls:
+                if tc.function.name == "refuse_answer":
+                    import json
+                    try:
+                        args = json.loads(tc.function.arguments)
+                        reason = args.get("reason", "Refused")
+                        context.should_refuse = True
+                        context.refuse_reason = reason
+                        return StageResult(
+                            success=True,
+                            data={"content": f"Refused: {reason}"},
+                            usage=usage,
+                            trace={"skipped": True, "reason": reason}
+                        )
+                    except: pass
         content = (response.choices[0].message.content or "").strip()
         return StageResult(
@@ -106,7 +129,7 @@ class SummaryStage(BaseStage):
             usage=usage,
             trace={
                 "model": model,
-                "provider": model_cfg.get("model_provider") or "Unknown",
+                "provider": model_cfg.model_provider or "Unknown",
                 "usage": usage,
                 "system_prompt": system_prompt,
                 "context_message": context_message,  # Includes vision description + search results

entari-plugin-hyw 4.0.0rc7__py3-none-any.whl → 4.0.0rc9__py3-none-any.whl

entari-plugin-hyw 4.0.0rc7py3-none-any.whl → 4.0.0rc9py3-none-any.whl