PyPI - entari-plugin-hyw - Versions diffs - 4.0.0rc6__py3-none-any.whl → 4.0.0rc8__py3-none-any.whl - Mend

entari-plugin-hyw 4.0.0rc6py3-none-any.whl → 4.0.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (114) hide show

entari_plugin_hyw/modular_pipeline.py → hyw_core/pipeline.py RENAMED Viewed

@@ -7,15 +7,15 @@ Simpler flow with self-correction/feedback loop.
 import asyncio
 import time
+import re
 from typing import Any, Dict, List, Optional, Callable, Awaitable
 from loguru import logger
 from openai import AsyncOpenAI
-from .stage_base import StageContext
-from .stage_instruct import InstructStage
-from .stage_instruct_deepsearch import InstructDeepsearchStage
-from .stage_summary import SummaryStage
+from .stages.base import StageContext, StageResult
+from .stages.base import StageContext, StageResult, BaseStage
+from .stages.summary import SummaryStage
 from .search import SearchService
@@ -24,36 +24,47 @@ class ModularPipeline:
     Modular Pipeline.
     Flow:
-    1. Instruct: Initial Discovery + Mode Decision (fast/deepsearch).
-    2. [Deepsearch only] Instruct Deepsearch Loop: Supplement info (max 3 iterations).
-    3. Summary: Generate final response.
+    1. Input Analysis:
+       - If Images -> Skip Search -> Summary
+       - If Text -> Execute Search (or URL fetch) -> Summary
+    2. Summary: Generate final response.
     """
-    def __init__(self, config: Any, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
+    def __init__(self, config: Any, search_service: SearchService, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
         self.config = config
         self.send_func = send_func
-        self.search_service = SearchService(config)
+        self.search_service = search_service
         self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
         # Initialize stages
-        self.instruct_stage = InstructStage(config, self.search_service, self.client)
-        self.instruct_deepsearch_stage = InstructDeepsearchStage(config, self.search_service, self.client)
         self.summary_stage = SummaryStage(config, self.search_service, self.client)
+    @property
+    def _send_func(self) -> Optional[Callable[[str], Awaitable[None]]]:
+        """Getter for _send_func (alias for send_func)."""
+        return self.send_func
+    @_send_func.setter
+    def _send_func(self, value: Optional[Callable[[str], Awaitable[None]]]):
+        """Setter for _send_func - updates send_func and propagates to stages."""
+        self.send_func = value
     async def execute(
         self,
         user_input: str,
         conversation_history: List[Dict],
         model_name: str = None,
         images: List[str] = None,
-        vision_model_name: str = None,
-        selected_vision_model: str = None,
     ) -> Dict[str, Any]:
         """Execute the modular pipeline."""
         start_time = time.time()
         stats = {"start_time": start_time}
         usage_totals = {"input_tokens": 0, "output_tokens": 0}
         active_model = model_name or self.config.model_name
+        if not active_model:
+             # Fallback to instruct model for logging/context
+             active_model = self.config.get_model_config("instruct").model_name
         context = StageContext(
             user_input=user_input,
@@ -79,56 +90,167 @@ class ModularPipeline:
         try:
             logger.info(f"Pipeline: Processing '{user_input[:30]}...'")
-            # === Stage 1: Instruct (Initial Discovery) ===
-            logger.info("Pipeline: Stage 1 - Instruct")
-            instruct_result = await self.instruct_stage.execute(context)
+            # === Image-First Logic ===
+            # When user provides images, skip search and go directly to Instruct
+            # Images will be passed through to both Instruct and Summary stages
+            has_user_images = bool(images)
+            if has_user_images:
+                logger.info(f"Pipeline: {len(images)} user image(s) detected. Skipping search -> Instruct.")
+            # === Search-First Logic (only when no images) ===
+            # 1. URL Detection
+            # Updated to capture full URLs including queries and paths
+            url_pattern = re.compile(r'https?://(?:[-\w./?=&%#]+)')
+            found_urls = url_pattern.findall(user_input)
+            hit_content = False
+            # Skip URL fetch and search if user provided images or long query
+            is_long_query = len(user_input) > 20
+            if has_user_images:
+                hit_content = False  # Force into Instruct path
+            elif is_long_query:
+                logger.info(f"Pipeline: Long query ({len(user_input)} chars). Skipping direct search/fetch -> Instruct.")
+                hit_content = False
+            elif found_urls:
+                logger.info(f"Pipeline: Detected {len(found_urls)} URLs. Executing direct fetch...")
+                # Fetch pages (borrowing logic from InstructStage's batch fetch would be ideal,
+                # but we'll use search_service directly and simulate what Instruct did for context)
+                # Fetch
+                fetch_results = await self.search_service.fetch_pages_batch(found_urls)
+                # Pre-render screenshots if needed (similar to InstructStage logic)
+                # For brevity/cleanliness, assuming fetch_pages_batch returns what we need or we process it.
+                # Ideally we want screenshots for the UI. The serivce.fetch_page usually returns raw data.
+                # We need to render them if we want screenshots.
+                # To keep it simple for this file, we'll skip complex screenshot rendering here OR
+                # we rely on the summary stage to just use the text.
+                # But the user logic implies "Search/Fetch Hit -> Summary".
+                # Let's populate context.web_results
+                for i, page_data in enumerate(fetch_results):
+                    if page_data.get("content"):
+                         hit_content = True
+                         context.web_results.append({
+                             "_id": context.next_id(),
+                             "_type": "page",
+                             "title": page_data.get("title", "Page"),
+                             "url": page_data.get("url", found_urls[i]),
+                             "content": page_data.get("content", ""),
+                             "images": page_data.get("images", []),
+                             # For now, no screenshot unless we call renderer.
+                             # If critical, we can add it later.
+                         })
+            # 2. Search (if no URLs or just always try search if simple query?)
+            # The prompt says: "judging result quantity > 0".
+            if not hit_content and not has_user_images and not is_long_query and user_input.strip():
+                logger.info("Pipeline: No URLs found or fetched. Executing direct search...")
+                search_start = time.time()
+                search_results = await self.search_service.search(user_input)
+                context.search_time = time.time() - search_start
+                # Filter out the raw debug page
+                valid_results = [r for r in search_results if not r.get("_hidden")]
+                if valid_results:
+                    logger.info(f"Pipeline: Search found {len(valid_results)} results in {context.search_time:.2f}s. Proceeding to Summary.")
+                    hit_content = True
+                    for item in search_results: # Add all, including hidden debug ones if needed by history
+                        item["_id"] = context.next_id()
+                        if "_type" not in item: item["_type"] = "search"
+                        item["query"] = user_input
+                        context.web_results.append(item)
+                else:
+                    logger.info("Pipeline: Search yielded 0 results.")
+            # === Branching ===
+            if hit_content and not has_user_images:
+                # -> Summary Stage (search/URL results available)
+                logger.info("Pipeline: Content found (URL/Search). Proceeding to Summary.")
+            # If no content was found and no images, we still proceed to Summary but with empty context (Direct Chat)
+            # If images, we proceed to Summary with images.
+            # Refusal check from search results? (Unlikely, but good to keep in mind)
+            pass
+            # === Parallel Execution: Summary Generation + Image Prefetching ===
+            # We run image prefetching concurrently with Summary generation to save time.
+            # 1. Prepare candidates for prefetch (all images in search results)
+            all_candidate_urls = set()
+            for r in context.web_results:
+                # Add images from search results/pages
+                if r.get("images"):
+                    for img in r["images"]:
+                        if img and isinstance(img, str) and img.startswith("http"):
+                            all_candidate_urls.add(img)
-            # Trace & Usage
-            instruct_result.trace["stage_name"] = "Instruct (Round 1)"
-            trace["instruct_rounds"].append(instruct_result.trace)
-            usage_totals["input_tokens"] += instruct_result.usage.get("input_tokens", 0)
-            usage_totals["output_tokens"] += instruct_result.usage.get("output_tokens", 0)
+            prefetch_list = list(all_candidate_urls)
+            logger.info(f"Pipeline: Starting parallel execution (Summary + Prefetch {len(prefetch_list)} images)")
+            # 2. Define parallel tasks with timing
+            async def timed_summary():
+                t0 = time.time()
+                # Collect page screenshots if image mode
+                summary_input_images = list(images) if images else []
+                if context.image_input_supported:
+                    # Collect pre-rendered screenshots from web_results
+                    for r in context.web_results:
+                        if r.get("_type") == "page" and r.get("screenshot_b64"):
+                            summary_input_images.append(r["screenshot_b64"])
+                if context.should_refuse:
+                     return StageResult(success=True, data={"content": "Refused"}, usage={}, trace={}), 0.0
+                res = await self.summary_stage.execute(
+                    context,
+                    images=summary_input_images if summary_input_images else None
+                )
+                duration = time.time() - t0
+                return res, duration
+            async def timed_prefetch():
+                t0 = time.time()
+                if not prefetch_list:
+                    return {}, 0.0
+                try:
+                    from .image_cache import get_image_cache
+                    cache = get_image_cache()
+                    # Start prefetch (non-blocking kickoff)
+                    cache.start_prefetch(prefetch_list)
+                    # Wait for results (blocking until done)
+                    res = await cache.get_all_cached(prefetch_list)
+                    duration = time.time() - t0
+                    return res, duration
+                except Exception as e:
+                    logger.warning(f"Pipeline: Prefetch failed: {e}")
+                    return {}, time.time() - t0
+            # 3. Execute concurrently
+            summary_task = asyncio.create_task(timed_summary())
+            prefetch_task = asyncio.create_task(timed_prefetch())
+            # Wait for both to complete
+            await asyncio.wait([summary_task, prefetch_task])
+            # 4. Process results and log timing
+            summary_result, summary_time = await summary_task
+            cached_map, prefetch_time = await prefetch_task
-            # Check refuse
             if context.should_refuse:
+                # Double check if summary triggered refusal
                 return self._build_refusal_response(context, conversation_history, active_model, stats)
-            # === Stage 2: Deepsearch Loop (if mode is deepsearch) ===
-            if context.selected_mode == "deepsearch":
-                MAX_DEEPSEARCH_ITERATIONS = 3
-                logger.info(f"Pipeline: Mode is 'deepsearch', starting loop (max {MAX_DEEPSEARCH_ITERATIONS} iterations)")
-                for i in range(MAX_DEEPSEARCH_ITERATIONS):
-                    logger.info(f"Pipeline: Stage 2 - Deepsearch Iteration {i + 1}")
-                    deepsearch_result = await self.instruct_deepsearch_stage.execute(context)
-                    # Trace & Usage
-                    deepsearch_result.trace["stage_name"] = f"Deepsearch (Iteration {i + 1})"
-                    trace["instruct_rounds"].append(deepsearch_result.trace)
-                    usage_totals["input_tokens"] += deepsearch_result.usage.get("input_tokens", 0)
-                    usage_totals["output_tokens"] += deepsearch_result.usage.get("output_tokens", 0)
-                    # Check if should stop
-                    if deepsearch_result.data.get("should_stop"):
-                        logger.info(f"Pipeline: Deepsearch loop ended at iteration {i + 1}")
-                        break
+            time_diff = abs(summary_time - prefetch_time)
+            if summary_time > prefetch_time:
+                logger.info(f"Pipeline: Image Prefetch finished first ({prefetch_time:.2f}s). Summary took {summary_time:.2f}s. (Waited {time_diff:.2f}s for Summary)")
             else:
-                logger.info("Pipeline: Mode is 'fast', skipping deepsearch stage")
-            # === Stage 3: Summary ===
-            # Collect page screenshots if image mode (already rendered in InstructStage)
-            all_images = list(images) if images else []
-            if context.image_input_supported:
-                # Collect pre-rendered screenshots from web_results
-                for r in context.web_results:
-                    if r.get("_type") == "page" and r.get("screenshot_b64"):
-                        all_images.append(r["screenshot_b64"])
-            summary_result = await self.summary_stage.execute(
-                context,
-                images=all_images if all_images else None
-            )
+                logger.info(f"Pipeline: Summary finished first ({summary_time:.2f}s). Image Prefetch took {prefetch_time:.2f}s. (Waited {time_diff:.2f}s for Prefetch)")
             trace["summary"] = summary_result.trace
             usage_totals["input_tokens"] += summary_result.usage.get("input_tokens", 0)
             usage_totals["output_tokens"] += summary_result.usage.get("output_tokens", 0)
@@ -139,40 +261,30 @@ class ModularPipeline:
             stats["total_time"] = time.time() - start_time
             structured = self._parse_response(summary_content, context)
-            # === Image Caching (Prefetch images for UI) ===
-            try:
-                from .image_cache import get_image_cache
-                cache = get_image_cache()
-                # 1. Collect all image URLs from structured response
-                all_image_urls = []
-                for ref in structured.get("references", []):
-                    if ref.get("images"):
-                        all_image_urls.extend([img for img in ref["images"] if img and img.startswith("http")])
-                if all_image_urls:
-                    # 2. Prefetch (wait for them as we are about to render)
-                    cached_map = await cache.get_all_cached(all_image_urls)
-                    # 3. Update structured response with cached (base64) URLs
+            # === Apply Cached Images ===
+            # Update structured response using the map from parallel prefetch
+            if cached_map:
+                try:
+                    total_replaced = 0
                     for ref in structured.get("references", []):
                         if ref.get("images"):
-                            # Keep cached images, but preserve original URLs as fallback
                             new_images = []
                             for img in ref["images"]:
-                                # 1. Already Base64 (from Search Injection) -> Keep it
+                                # 1. Already Base64 -> Keep it
                                 if img.startswith("data:"):
                                     new_images.append(img)
                                     continue
-                                # 2. Cached successfully -> Keep it
+                                # 2. Check cache
                                 cached_val = cached_map.get(img)
                                 if cached_val and cached_val.startswith("data:"):
                                     new_images.append(cached_val)
-                                # 3. Else -> DROP IT (User request: "Delete Fallback, must download in advance")
+                                    total_replaced += 1
+                                # 3. Else -> DROP IT (as per policy)
                             ref["images"] = new_images
-            except Exception as e:
-                logger.warning(f"Pipeline: Image caching failed: {e}")
+                    logger.debug(f"Pipeline: Replaced {total_replaced} images with cached versions")
+                except Exception as e:
+                    logger.warning(f"Pipeline: Applying cached images failed: {e}")
             # Debug: Log image counts
             total_ref_images = sum(len(ref.get("images", []) or []) for ref in structured.get("references", []))
@@ -197,6 +309,9 @@ class ModularPipeline:
                 },
                 "stages_used": stages_used,
                 "web_results": context.web_results,
+                "trace": trace,
+                "instruct_traces": trace.get("instruct_rounds", []),
             }
         except Exception as e:
@@ -305,6 +420,8 @@ class ModularPipeline:
         # Sort: Fetched first
         search_refs.sort(key=lambda x: x["is_fetched"], reverse=True)
+        logger.debug(f"_build_stages_ui: Found {len(search_refs)} search refs from {len(context.web_results)} web_results")
         if search_refs:
             stages.append({
                 "name": "Search",
@@ -312,9 +429,10 @@ class ModularPipeline:
                 "icon_config": "openai",
                 "provider": "Web",
                 "references": search_refs,
-                "description": f"Found {len(search_refs)} results."
+                "description": f"Found {len(search_refs)} results.",
+                "time": getattr(context, 'search_time', 0)
             })
         # 2. Instruct Rounds
         for i, t in enumerate(trace.get("instruct_rounds", [])):
             stage_name = t.get("stage_name", f"Analysis {i+1}")
@@ -330,8 +448,8 @@ class ModularPipeline:
             # Calculate cost from config prices
             usage = t.get("usage", {})
             instruct_cfg = self.config.get_model_config("instruct")
-            input_price = instruct_cfg.get("input_price") or 0
-            output_price = instruct_cfg.get("output_price") or 0
+            input_price = instruct_cfg.input_price or 0
+            output_price = instruct_cfg.output_price or 0
             cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
             stages.append({
@@ -350,8 +468,8 @@ class ModularPipeline:
             s = trace["summary"]
             usage = s.get("usage", {})
             main_cfg = self.config.get_model_config("main")
-            input_price = main_cfg.get("input_price") or 0
-            output_price = main_cfg.get("output_price") or 0
+            input_price = main_cfg.input_price or 0
+            output_price = main_cfg.output_price or 0
             cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
             stages.append({

{entari_plugin_hyw → hyw_core}/search.py RENAMED Viewed

@@ -5,11 +5,11 @@ import time
 from typing import List, Dict, Any, Optional
 from loguru import logger
-from .browser.service import get_screenshot_service
-# New engines
-from .browser.engines.bing import BingEngine
-from .browser.engines.duckduckgo import DuckDuckGoEngine
-from .browser.engines.google import GoogleEngine
+from .browser_control.service import get_screenshot_service
+# Search engines from browser_control subpackage
+from .browser_control.engines.duckduckgo import DuckDuckGoEngine
+from .browser_control.engines.google import GoogleEngine
+from .browser_control.engines.default import DefaultEngine
 class SearchService:
     def __init__(self, config: Any):
@@ -21,17 +21,19 @@ class SearchService:
         # Domain blocking
         self._blocked_domains = getattr(config, "blocked_domains", []) or []
-        # Select Engine
-        self._engine_name = getattr(config, "search_engine", "bing").lower()
-        if self._engine_name == "bing":
-            self._engine = BingEngine()
-        elif self._engine_name == "google":
+        # Select Engine - DefaultEngine when not specified
+        self._engine_name = getattr(config, "search_engine", None)
+        if self._engine_name:
+            self._engine_name = self._engine_name.lower()
+        if self._engine_name == "google":
             self._engine = GoogleEngine()
-        elif self._engine_name == "duckduckgo":
-            self._engine = DuckDuckGoEngine()
+        elif self._engine_name == "default_address_bar": # Explicitly requested address bar capability if needed
+             self._engine = DefaultEngine()
         else:
-            # Default fallback
-            self._engine = BingEngine()
+            # Default: use DuckDuckGo
+            self._engine = DuckDuckGoEngine()
+            self._engine_name = "duckduckgo"
         logger.info(f"SearchService initialized with engine: {self._engine_name}")
@@ -39,7 +41,8 @@ class SearchService:
         return self._engine.build_url(query, self._default_limit)
     async def search_batch(self, queries: List[str]) -> List[List[Dict[str, Any]]]:
-        """Execute multiple searches concurrently."""
+        """Execute multiple searches concurrently using standard URL navigation."""
+        logger.info(f"SearchService: Batch searching {len(queries)} queries in parallel...")
         tasks = [self.search(q) for q in queries]
         return await asyncio.gather(*tasks)
@@ -58,17 +61,36 @@ class SearchService:
              final_query = f"{query} {exclusions}"
         url = self._build_search_url(final_query)
-        logger.info(f"Search: '{query}' -> {url}")
         results = []
         try:
-            # Fetch - Search parsing doesn't need screenshot, only HTML
-            page_data = await self.fetch_page_raw(url, include_screenshot=False)
+            # Check if this is an address bar search (DefaultEngine)
+            if url.startswith("__ADDRESS_BAR_SEARCH__:"):
+                # Extract query from marker
+                search_query = url.replace("__ADDRESS_BAR_SEARCH__:", "")
+                logger.info(f"Search: '{query}' -> [Address Bar Search]")
+                # Use address bar input method
+                service = get_screenshot_service(headless=self._headless)
+                page_data = await service.search_via_address_bar(search_query)
+            else:
+                logger.info(f"Search: '{query}' -> {url}")
+                # Standard URL navigation
+                page_data = await self.fetch_page_raw(url, include_screenshot=False)
             content = page_data.get("html", "") or page_data.get("content", "")
+            # Debug: Log content length
+            logger.debug(f"Search: Raw content length = {len(content)} chars")
+            if len(content) < 500:
+                logger.warning(f"Search: Content too short, may be empty/blocked. First 500 chars: {content[:500]}")
             # Parse Results (skip raw page - only return parsed results)
             if content and not content.startswith("Error"):
                 parsed = self._engine.parse(content)
+                # Debug: Log parse result
+                logger.info(f"Search: Engine {self._engine_name} parsed {len(parsed)} results from {len(content)} chars")
                 # JAVASCRIPT IMAGE INJECTION
                 # Inject base64 images from JS extraction if available
@@ -84,6 +106,17 @@ class SearchService:
                             parsed[i]["images"].insert(0, b64_src)
                 logger.info(f"Search parsed {len(parsed)} results for '{query}' using {self._engine_name}")
+                # ALWAYS add raw search page as hidden item for debug saving
+                # (even when 0 results, so we can debug the parser)
+                results.append({
+                    "title": f"[DEBUG] Raw Search: {query}",
+                    "url": url,
+                    "content": content[:50000],  # Limit to 50KB
+                    "_type": "search_raw_page",
+                    "_hidden": True,  # Don't show to LLM
+                })
                 results.extend(parsed)
             else:
                 logger.warning(f"Search failed/empty for '{query}': {content[:100]}")
@@ -120,3 +153,11 @@ class SearchService:
             timeout = self._fetch_timeout
         service = get_screenshot_service(headless=self._headless)
         return await service.fetch_page(url, timeout=timeout, include_screenshot=include_screenshot)
+    async def screenshot_url(self, url: str, full_page: bool = True) -> Optional[str]:
+        """
+        Capture a screenshot of a URL.
+        Delegates to screenshot service.
+        """
+        service = get_screenshot_service(headless=self._headless)
+        return await service.screenshot_url(url, full_page=full_page)

hyw_core/stages/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+hyw_core.stages - Pipeline Stages
+This subpackage provides the pipeline stage implementations:
+- BaseStage: Abstract base class for all stages
+- StageContext: Shared context between stages
+- StageResult: Stage execution result
+- InstructStage: Initial task planning and search execution
+- SummaryStage: Final response generation
+"""
+from .base import BaseStage, StageContext, StageResult
+from .summary import SummaryStage
+__all__ = [
+    "BaseStage",
+    "StageContext",
+    "StageResult",
+    "SummaryStage",
+]

entari_plugin_hyw/stage_base.py → hyw_core/stages/base.py RENAMED Viewed

@@ -39,6 +39,9 @@ class StageContext:
     # Model capabilities
     image_input_supported: bool = True
+    # Search timing
+    search_time: float = 0.0
     def next_id(self) -> int:
         """Get next global ID."""
         self.global_id_counter += 1

entari_plugin_hyw/stage_summary.py → hyw_core/stages/summary.py RENAMED Viewed

@@ -12,8 +12,8 @@ from typing import Any, Dict, List, Optional
 from loguru import logger
 from openai import AsyncOpenAI
-from .stage_base import BaseStage, StageContext, StageResult
-from .definitions import SUMMARY_REPORT_SP
+from .base import BaseStage, StageContext, StageResult
+from ..definitions import SUMMARY_REPORT_SP, get_refuse_answer_tool
 class SummaryStage(BaseStage):
@@ -35,6 +35,9 @@ class SummaryStage(BaseStage):
         # Format context from web results
         web_content = self._format_web_content(context)
+        # Tools
+        refuse_tool = get_refuse_answer_tool()
         full_context = f"{context.agent_context}\n\n{web_content}"
         # Select prompt
@@ -47,10 +50,13 @@ class SummaryStage(BaseStage):
         # Build Context Message
         context_message = f"## Web Search & Page Content\n\n```context\n{full_context}\n```"
         # Build user content
         user_text = context.user_input or "..."
         if images:
-            user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
+            # Add image context message for multimodal input
+            image_context = f"[System: The user has provided {len(images)} image(s). Please analyze these images together with the text query to provide a comprehensive response.]"
+            user_content: List[Dict[str, Any]] = [{"type": "text", "text": f"{image_context}\n\n{user_text}"}]
             for img_b64 in images:
                 url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
                 user_content.append({"type": "image_url", "image_url": {"url": url}})
@@ -67,18 +73,21 @@ class SummaryStage(BaseStage):
         model_cfg = self.config.get_model_config("main")
         client = self._client_for(
-            api_key=model_cfg.get("api_key"),
-            base_url=model_cfg.get("base_url")
+            api_key=model_cfg.api_key,
+            base_url=model_cfg.base_url
         )
-        model = model_cfg.get("model_name") or self.config.model_name
+        model = model_cfg.model_name or self.config.model_name
         try:
             response = await client.chat.completions.create(
                 model=model,
                 messages=messages,
                 temperature=self.config.temperature,
                 extra_body=getattr(self.config, "summary_extra_body", None),
+                tools=[refuse_tool],
+                tool_choice="auto",
             )
         except Exception as e:
             logger.error(f"SummaryStage LLM error: {e}")
@@ -93,6 +102,25 @@ class SummaryStage(BaseStage):
             usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
             usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
+        # Handle Tool Calls (Refusal)
+        tool_calls = response.choices[0].message.tool_calls
+        if tool_calls:
+            for tc in tool_calls:
+                if tc.function.name == "refuse_answer":
+                    import json
+                    try:
+                        args = json.loads(tc.function.arguments)
+                        reason = args.get("reason", "Refused")
+                        context.should_refuse = True
+                        context.refuse_reason = reason
+                        return StageResult(
+                            success=True,
+                            data={"content": f"Refused: {reason}"},
+                            usage=usage,
+                            trace={"skipped": True, "reason": reason}
+                        )
+                    except: pass
         content = (response.choices[0].message.content or "").strip()
         return StageResult(
@@ -101,9 +129,10 @@ class SummaryStage(BaseStage):
             usage=usage,
             trace={
                 "model": model,
-                "provider": model_cfg.get("model_provider") or "Unknown",
+                "provider": model_cfg.model_provider or "Unknown",
                 "usage": usage,
                 "system_prompt": system_prompt,
+                "context_message": context_message,  # Includes vision description + search results
                 "output": content,
                 "time": time.time() - start_time,
                 "images_count": len(images) if images else 0,

entari-plugin-hyw 4.0.0rc6__py3-none-any.whl → 4.0.0rc8__py3-none-any.whl

Potentially problematic release.

entari-plugin-hyw 4.0.0rc6py3-none-any.whl → 4.0.0rc8py3-none-any.whl