PyPI - kolega-code - Versions diffs - 0.1.0__py3-none-any.whl - Mend

kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

kolega_code/__init__.py +151 -0
kolega_code/agent/__init__.py +42 -0
kolega_code/agent/baseagent.py +998 -0
kolega_code/agent/browseragent.py +123 -0
kolega_code/agent/coder.py +157 -0
kolega_code/agent/common.py +41 -0
kolega_code/agent/compression.py +81 -0
kolega_code/agent/context.py +112 -0
kolega_code/agent/conversation.py +408 -0
kolega_code/agent/generalagent.py +146 -0
kolega_code/agent/investigationagent.py +123 -0
kolega_code/agent/planningagent.py +187 -0
kolega_code/agent/prompt_provider.py +196 -0
kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
kolega_code/agent/prompts.py +192 -0
kolega_code/agent/tests/__init__.py +0 -0
kolega_code/agent/tests/llm/__init__.py +0 -0
kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
kolega_code/agent/tests/llm/test_client.py +773 -0
kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
kolega_code/agent/tests/llm/test_exceptions.py +249 -0
kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
kolega_code/agent/tests/llm/test_model_specs.py +17 -0
kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
kolega_code/agent/tests/services/__init__.py +1 -0
kolega_code/agent/tests/services/test_browser.py +447 -0
kolega_code/agent/tests/services/test_browser_parity.py +353 -0
kolega_code/agent/tests/services/test_file_system.py +699 -0
kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
kolega_code/agent/tests/services/test_terminal.py +154 -0
kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
kolega_code/agent/tests/test_base_agent.py +1942 -0
kolega_code/agent/tests/test_coder_attachments.py +330 -0
kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
kolega_code/agent/tests/test_commands.py +179 -0
kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
kolega_code/agent/tests/test_empty_message_handling.py +48 -0
kolega_code/agent/tests/test_general_agent.py +242 -0
kolega_code/agent/tests/test_html.py +320 -0
kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
kolega_code/agent/tests/test_planning_agent.py +227 -0
kolega_code/agent/tests/test_prompt_provider.py +271 -0
kolega_code/agent/tests/test_tool_registry.py +102 -0
kolega_code/agent/tests/test_tools.py +549 -0
kolega_code/agent/tests/tool_backend/__init__.py +0 -0
kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
kolega_code/agent/tool_backend/agent_tool.py +414 -0
kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
kolega_code/agent/tool_backend/base_tool.py +217 -0
kolega_code/agent/tool_backend/browser_tool.py +271 -0
kolega_code/agent/tool_backend/build_tool.py +93 -0
kolega_code/agent/tool_backend/create_file_tool.py +52 -0
kolega_code/agent/tool_backend/glob_tool.py +323 -0
kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
kolega_code/agent/tool_backend/memory_tool.py +79 -0
kolega_code/agent/tool_backend/read_file_tool.py +119 -0
kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
kolega_code/agent/tool_backend/streaming_tool.py +47 -0
kolega_code/agent/tool_backend/terminal_tool.py +643 -0
kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
kolega_code/agent/tools.py +1704 -0
kolega_code/agent/utils/commands.py +94 -0
kolega_code/cli/__init__.py +1 -0
kolega_code/cli/app.py +2756 -0
kolega_code/cli/config.py +280 -0
kolega_code/cli/connection.py +49 -0
kolega_code/cli/file_index.py +147 -0
kolega_code/cli/main.py +564 -0
kolega_code/cli/mentions.py +155 -0
kolega_code/cli/messages.py +89 -0
kolega_code/cli/provider_registry.py +96 -0
kolega_code/cli/session_store.py +207 -0
kolega_code/cli/settings.py +87 -0
kolega_code/cli/skills.py +409 -0
kolega_code/cli/slash_commands.py +108 -0
kolega_code/cli/tests/__init__.py +1 -0
kolega_code/cli/tests/test_app.py +4251 -0
kolega_code/cli/tests/test_cli_config.py +171 -0
kolega_code/cli/tests/test_connection.py +26 -0
kolega_code/cli/tests/test_file_index.py +103 -0
kolega_code/cli/tests/test_main.py +455 -0
kolega_code/cli/tests/test_mentions.py +108 -0
kolega_code/cli/tests/test_session_store.py +67 -0
kolega_code/cli/tests/test_settings.py +62 -0
kolega_code/cli/tests/test_skills.py +157 -0
kolega_code/cli/tests/test_slash_commands.py +88 -0
kolega_code/cli/theme.py +180 -0
kolega_code/config.py +154 -0
kolega_code/events.py +202 -0
kolega_code/llm/client.py +300 -0
kolega_code/llm/exceptions.py +285 -0
kolega_code/llm/instrumented_client.py +520 -0
kolega_code/llm/models.py +1368 -0
kolega_code/llm/providers/__init__.py +0 -0
kolega_code/llm/providers/anthropic.py +387 -0
kolega_code/llm/providers/base.py +71 -0
kolega_code/llm/providers/google.py +157 -0
kolega_code/llm/providers/models.py +37 -0
kolega_code/llm/providers/openai.py +363 -0
kolega_code/llm/ratelimit.py +40 -0
kolega_code/llm/specs.py +67 -0
kolega_code/llm/tool_execution_ids.py +18 -0
kolega_code/models/__init__.py +9 -0
kolega_code/models/sandbox_terminal_state.py +47 -0
kolega_code/runtime.py +50 -0
kolega_code/sandbox/README.md +200 -0
kolega_code/sandbox/__init__.py +21 -0
kolega_code/sandbox/async_filesystem.py +475 -0
kolega_code/sandbox/base.py +297 -0
kolega_code/sandbox/browser.py +25 -0
kolega_code/sandbox/event_loop.py +43 -0
kolega_code/sandbox/filesystem.py +341 -0
kolega_code/sandbox/local.py +118 -0
kolega_code/sandbox/serializer.py +175 -0
kolega_code/sandbox/terminal.py +868 -0
kolega_code/sandbox/utils.py +216 -0
kolega_code/services/base.py +255 -0
kolega_code/services/browser.py +444 -0
kolega_code/services/file_system.py +749 -0
kolega_code/services/html.py +221 -0
kolega_code/services/terminal.py +903 -0
kolega_code/tools/__init__.py +22 -0
kolega_code/tools/core.py +33 -0
kolega_code/tools/definitions.py +81 -0
kolega_code/tools/registry.py +73 -0
kolega_code-0.1.0.dist-info/METADATA +157 -0
kolega_code-0.1.0.dist-info/RECORD +171 -0
kolega_code-0.1.0.dist-info/WHEEL +4 -0
kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0

kolega_code/agent/tool_backend/think_hard_tool.py ADDED Viewed

@@ -0,0 +1,211 @@
+from .. import prompts
+from kolega_code.llm.client import LLMClient
+from kolega_code.llm.instrumented_client import InstrumentedLLMClient
+from kolega_code.llm.models import Message, MessageHistory, TextBlock, ThinkingBlock
+from kolega_code.llm.specs import get_model_specs
+from .streaming_tool import StreamingTool
+class ThinkHardTool(StreamingTool):
+    async def think_hard(self, problem_statement: str) -> str:
+        """
+        Uses Claude 3.7 Sonnet in extended thinking mode to analyze a problem deeply.
+        This tool leverages Claude's extended thinking capabilities to perform in-depth
+        analysis on complex problems. It sends the problem statement to the Claude API
+        with specific parameters to enable extended thinking and returns the detailed response.
+        Args:
+            problem_statement: A clear statement of the problem to be analyzed, including ALL relevant details.
+        Returns:
+            The detailed analysis from Claude, including its extended thinking process
+        """
+        await self.log_info(f"Thinking hard about: {problem_statement[:100]}...", sender=self.caller.agent_name)
+        provider = self.config.thinking_config.provider
+        api_key = self.config.get_api_key(provider)
+        rate_limits = self.config.thinking_config.rate_limits
+        # Check if the caller has an instrumented client we can leverage
+        if hasattr(self.caller, "llm") and isinstance(self.caller.llm, InstrumentedLLMClient):
+            # Create a new instrumented client with the same Langfuse instance but for thinking
+            client = InstrumentedLLMClient(
+                provider=provider.value,
+                api_key=api_key,
+                max_retries=rate_limits.max_retries,
+                requests_per_minute=rate_limits.requests_per_minute,
+                tokens_per_minute=rate_limits.tokens_per_minute,
+                langfuse_client=self.caller.llm.langfuse,
+                workspace_id=self.caller.workspace_id,
+                thread_id=self.caller.thread_id,
+                agent_type=f"{self.caller.agent_name}-thinking",
+                environment=self.caller.llm.environment,
+                user_id=self.caller.user_id,
+                user_email=self.caller.user_email,
+            )
+        else:
+            # Fallback to regular client
+            client = LLMClient(
+                provider=provider.value,
+                api_key=api_key,
+                max_retries=rate_limits.max_retries,
+                requests_per_minute=rate_limits.requests_per_minute,
+                tokens_per_minute=rate_limits.tokens_per_minute,
+            )
+        try:
+            model_specs = get_model_specs(self.config.thinking_config.provider, self.config.thinking_config.model)
+            # Call LLM with extended thinking enabled
+            thinking_param = self.config.thinking_config.thinking_tokens
+            system_message = Message(role="system", content=[TextBlock(text=prompts.THINK_HARD_PROMPT)])
+            messages = MessageHistory(
+                [
+                    Message(
+                        role="user",
+                        content=[
+                            TextBlock(
+                                text=f"Think deeply and comprehensively about this problem:\n\n{problem_statement}"
+                            )
+                        ],
+                    )
+                ]
+            )
+            # Get tool_call_id from caller if available for streaming
+            tool_call_id = getattr(self.caller, "current_tool_call_id", None)
+            # Use streaming to avoid timeout issues
+            thinking_content = []
+            text_content = []
+            accumulated_thinking = ""
+            accumulated_text = ""
+            has_sent_thinking_header = False
+            has_sent_analysis_header = False
+            # Ensure max_completion_tokens is greater than thinking_tokens
+            # According to Anthropic docs: max_tokens must be greater than thinking.budget_tokens
+            max_completion = model_specs["max_completion_tokens"]
+            if thinking_param and max_completion <= thinking_param:
+                # Add some buffer to ensure we have room for the actual response
+                max_completion = thinking_param + 2000
+                await self.log_info(
+                    f"Adjusted max_completion_tokens from {model_specs['max_completion_tokens']} to {max_completion} "
+                    f"to accommodate thinking_tokens of {thinking_param}",
+                    sender=self.caller.agent_name,
+                )
+            # Use the stream and process chunks for streaming updates
+            async with await client.stream(
+                model=self.config.thinking_config.model,
+                max_completion_tokens=max_completion,
+                system=system_message,
+                messages=messages,
+                thinking=thinking_param,
+            ) as stream:
+                # Process chunks for streaming if we have a tool_call_id
+                if tool_call_id:
+                    async for chunk in stream:
+                        # Check if this is a thinking chunk
+                        if hasattr(chunk, "thinking") and chunk.thinking:
+                            # Send header if first thinking content
+                            if not has_sent_thinking_header:
+                                await self.send_streaming_update(
+                                    "# Extended Thinking Process\n\n",
+                                    tool_call_id,
+                                    "think_hard",
+                                    is_complete=False,
+                                    stream_mode="append",
+                                )
+                                has_sent_thinking_header = True
+                            accumulated_thinking += chunk.thinking
+                            # Stream thinking content periodically
+                            if len(accumulated_thinking) >= 50:
+                                await self.send_streaming_update(
+                                    accumulated_thinking,
+                                    tool_call_id,
+                                    "think_hard",
+                                    is_complete=False,
+                                    stream_mode="append",
+                                )
+                                accumulated_thinking = ""
+                        # Check if this is a text chunk
+                        elif hasattr(chunk, "text") and chunk.text:
+                            # Send any remaining thinking content and analysis header
+                            if accumulated_thinking:
+                                await self.send_streaming_update(
+                                    accumulated_thinking + "\n\n",
+                                    tool_call_id,
+                                    "think_hard",
+                                    is_complete=False,
+                                    stream_mode="append",
+                                )
+                                accumulated_thinking = ""
+                            if not has_sent_analysis_header:
+                                await self.send_streaming_update(
+                                    "# Final Analysis\n\n",
+                                    tool_call_id,
+                                    "think_hard",
+                                    is_complete=False,
+                                    stream_mode="append",
+                                )
+                                has_sent_analysis_header = True
+                            accumulated_text += chunk.text
+                            # Stream text content periodically
+                            if len(accumulated_text) >= 50:
+                                await self.send_streaming_update(
+                                    accumulated_text,
+                                    tool_call_id,
+                                    "think_hard",
+                                    is_complete=False,
+                                    stream_mode="append",
+                                )
+                                accumulated_text = ""
+                    # Send any remaining accumulated content
+                    remaining_content = accumulated_thinking + accumulated_text
+                    if remaining_content:
+                        await self.send_streaming_update(
+                            remaining_content,
+                            tool_call_id,
+                            "think_hard",
+                            is_complete=False,
+                            stream_mode="append",
+                        )
+                # Get the final message regardless of streaming
+                final_message = await stream.get_final_message()
+            # Extract thinking and text content from the final message
+            for block in final_message.content:
+                if isinstance(block, ThinkingBlock):
+                    thinking_content.append(block.thinking)
+                elif isinstance(block, TextBlock):
+                    text_content.append(block.text)
+            # Build the complete result
+            result = ""
+            if thinking_content:
+                result += "# Extended Thinking Process\n\n"
+                result += "\n".join(thinking_content) + "\n\n"
+            result += "# Final Analysis\n\n"
+            result += "\n".join(text_content)
+            # Send final complete update if streaming
+            if tool_call_id:
+                await self.send_streaming_update(
+                    result, tool_call_id, "think_hard", is_complete=True, stream_mode="replace"
+                )
+            return result
+        except Exception as e:
+            error_message = f"Error during extended thinking: {str(e)}"
+            await self.log_error(error_message, sender=self.caller.agent_name)
+            return error_message

kolega_code/agent/tool_backend/web_fetch_tool.py ADDED Viewed

@@ -0,0 +1,205 @@
+import asyncio
+from pathlib import Path
+from typing import Union
+import trafilatura
+from kolega_code.config import AgentConfig
+from kolega_code.llm.client import LLMClient
+from kolega_code.llm.instrumented_client import InstrumentedLLMClient
+from kolega_code.llm.models import Message, MessageHistory, TextBlock
+from kolega_code.llm.specs import get_model_specs
+from .streaming_tool import StreamingTool
+class WebFetchTool(StreamingTool):
+    """Tool for fetching web page content and delegating lightweight processing to the fast model."""
+    FETCH_TIMEOUT_SECONDS = 20
+    MAX_CONTENT_CHARS = 100_000
+    DEFAULT_RESPONSE_CHAR_LIMIT = 512
+    WEB_FETCH_MAX_COMPLETION_TOKENS = 4096
+    def __init__(
+        self,
+        project_path: Union[str, Path],
+        workspace_id: str,
+        thread_id: str,
+        connection_manager,
+        config: AgentConfig,
+        caller,
+        filesystem=None,
+    ):
+        super().__init__(project_path, workspace_id, thread_id, connection_manager, config, caller, filesystem)
+    async def web_fetch(self, url: str, instruction: str) -> str:
+        """
+        Fetch web content from a URL, process it with the fast model, and return a concise answer.
+        This tool downloads the page, extracts clean text via Trafilatura, and asks the fast LLM
+        to follow the provided instruction. The model is asked to keep the output compact (≈512
+        characters), but the result is only trimmed if it well exceeds that limit.
+        Args:
+            url: Fully qualified URL to fetch (http/https).
+            instruction: Guidance for how the extracted content should be used.
+        Returns:
+            The model's response derived from the fetched content, truncated to the character limit if necessary.
+        """
+        if not url or not url.lower().startswith(("http://", "https://")):
+            return "Error: Provide a valid http(s) URL."
+        tool_call_id = getattr(self.caller, "current_tool_call_id", None)
+        if tool_call_id:
+            await self.send_streaming_update(
+                f"Fetching content from {url}...", tool_call_id, "web_fetch", is_complete=False
+            )
+        try:
+            downloaded_html = await asyncio.wait_for(
+                asyncio.to_thread(trafilatura.fetch_url, url), timeout=self.FETCH_TIMEOUT_SECONDS
+            )
+        except asyncio.TimeoutError:
+            error_message = f"Error: Timed out fetching {url} after {self.FETCH_TIMEOUT_SECONDS} seconds."
+            if tool_call_id:
+                await self.send_streaming_update(error_message, tool_call_id, "web_fetch", is_complete=True)
+            return error_message
+        except Exception as exc:  # pragma: no cover - defensive logging branch
+            error_message = f"Error: Failed to fetch {url}: {exc}"
+            if tool_call_id:
+                await self.send_streaming_update(error_message, tool_call_id, "web_fetch", is_complete=True)
+            return error_message
+        if not downloaded_html:
+            message = f"Error: No content retrieved from {url}."
+            if tool_call_id:
+                await self.send_streaming_update(message, tool_call_id, "web_fetch", is_complete=True)
+            return message
+        try:
+            extracted_text = await asyncio.to_thread(
+                trafilatura.extract,
+                downloaded_html,
+                include_comments=False,
+                include_tables=True,
+            )
+        except Exception as exc:  # pragma: no cover - defensive logging branch
+            error_message = f"Error: Failed to extract content from {url}: {exc}"
+            if tool_call_id:
+                await self.send_streaming_update(error_message, tool_call_id, "web_fetch", is_complete=True)
+            return error_message
+        if not extracted_text or not extracted_text.strip():
+            message = f"Error: Extracted page content for {url} is empty."
+            if tool_call_id:
+                await self.send_streaming_update(message, tool_call_id, "web_fetch", is_complete=True)
+            return message
+        content = extracted_text.strip()
+        truncated_note = ""
+        if len(content) > self.MAX_CONTENT_CHARS:
+            content = content[: self.MAX_CONTENT_CHARS]
+            truncated_note = (
+                f"\n\n[Web content truncated to first {self.MAX_CONTENT_CHARS} characters to fit token limits.]"
+            )
+        if tool_call_id:
+            await self.send_streaming_update(
+                "Processing content with fast model...", tool_call_id, "web_fetch", is_complete=False
+            )
+        provider = self.config.fast_config.provider
+        api_key = self.config.get_api_key(provider)
+        rate_limits = self.config.fast_config.rate_limits
+        client_kwargs = {
+            "provider": provider.value,
+            "api_key": api_key,
+            "max_retries": rate_limits.max_retries,
+            "requests_per_minute": rate_limits.requests_per_minute,
+            "tokens_per_minute": rate_limits.tokens_per_minute,
+        }
+        if hasattr(self.caller, "llm") and isinstance(self.caller.llm, InstrumentedLLMClient):
+            client = InstrumentedLLMClient(
+                langfuse_client=self.caller.llm.langfuse,
+                workspace_id=getattr(self.caller, "workspace_id", None),
+                thread_id=getattr(self.caller, "thread_id", None),
+                agent_type=f"{self.caller.agent_name}-web-fetch",
+                environment=self.config.environment,
+                user_id=getattr(self.caller, "user_id", None),
+                user_email=getattr(self.caller, "user_email", None),
+                **client_kwargs,
+            )
+        else:
+            client = LLMClient(**client_kwargs)
+        try:
+            model_specs = get_model_specs(provider, self.config.fast_config.model)
+            max_completion_tokens = min(
+                int(model_specs["max_completion_tokens"]),
+                self.WEB_FETCH_MAX_COMPLETION_TOKENS,
+            )
+            target_chars = self.DEFAULT_RESPONSE_CHAR_LIMIT
+            system_prompt = Message(
+                role="system",
+                content=[
+                    TextBlock(
+                        text=(
+                            "You see extracted web page content and an instruction. Follow the instruction faithfully"
+                            f" and keep the response around {target_chars} characters when possible—concise but clear."
+                            " If more detail is required, stay well-structured and call out when the content is"
+                            " insufficient."
+                        )
+                    )
+                ],
+            )
+            user_prompt = Message(
+                role="user",
+                content=[
+                    TextBlock(
+                        text=f"Instruction:\n{instruction.strip()}\n\nWeb content:\n{content}{truncated_note}"
+                    )
+                ],
+            )
+            response_message = await client.generate(
+                model=self.config.fast_config.model,
+                max_completion_tokens=max_completion_tokens,
+                system=system_prompt,
+                messages=MessageHistory([user_prompt]),
+                temperature=0.0,
+            )
+            response_text = (response_message.get_text_content() or "").strip()
+            if not response_text:
+                error_message = "Error: Fast model returned an empty response for fetched content."
+                if tool_call_id:
+                    await self.send_streaming_update(error_message, tool_call_id, "web_fetch", is_complete=True)
+                return error_message
+            hard_cut_threshold = target_chars * 2
+            if len(response_text) > hard_cut_threshold:
+                # Prefer trimming on word boundaries to avoid mid-word truncation.
+                trimmed = response_text[:target_chars].rstrip()
+                cut_index = trimmed.rfind(" ")
+                if cut_index > 0:
+                    trimmed = trimmed[:cut_index]
+                if not trimmed:
+                    trimmed = response_text[:target_chars]
+                response_text = trimmed.rstrip(" ,.;:-") + "…"
+            if tool_call_id:
+                await self.send_streaming_update(response_text, tool_call_id, "web_fetch", is_complete=True)
+            return response_text
+        except Exception as exc:
+            error_message = f"Error: Failed to process content with fast model: {exc}"
+            if tool_call_id:
+                await self.send_streaming_update(error_message, tool_call_id, "web_fetch", is_complete=True)
+            return error_message