PyPI - kollabor - Versions diffs - 0.4.9__py3-none-any.whl → 0.4.15__py3-none-any.whl - Mend

kollabor 0.4.9py3-none-any.whl → 0.4.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

agents/__init__.py +2 -0
agents/coder/__init__.py +0 -0
agents/coder/agent.json +4 -0
agents/coder/api-integration.md +2150 -0
agents/coder/cli-pretty.md +765 -0
agents/coder/code-review.md +1092 -0
agents/coder/database-design.md +1525 -0
agents/coder/debugging.md +1102 -0
agents/coder/dependency-management.md +1397 -0
agents/coder/git-workflow.md +1099 -0
agents/coder/refactoring.md +1454 -0
agents/coder/security-hardening.md +1732 -0
agents/coder/system_prompt.md +1448 -0
agents/coder/tdd.md +1367 -0
agents/creative-writer/__init__.py +0 -0
agents/creative-writer/agent.json +4 -0
agents/creative-writer/character-development.md +1852 -0
agents/creative-writer/dialogue-craft.md +1122 -0
agents/creative-writer/plot-structure.md +1073 -0
agents/creative-writer/revision-editing.md +1484 -0
agents/creative-writer/system_prompt.md +690 -0
agents/creative-writer/worldbuilding.md +2049 -0
agents/data-analyst/__init__.py +30 -0
agents/data-analyst/agent.json +4 -0
agents/data-analyst/data-visualization.md +992 -0
agents/data-analyst/exploratory-data-analysis.md +1110 -0
agents/data-analyst/pandas-data-manipulation.md +1081 -0
agents/data-analyst/sql-query-optimization.md +881 -0
agents/data-analyst/statistical-analysis.md +1118 -0
agents/data-analyst/system_prompt.md +928 -0
agents/default/__init__.py +0 -0
agents/default/agent.json +4 -0
agents/default/dead-code.md +794 -0
agents/default/explore-agent-system.md +585 -0
agents/default/system_prompt.md +1448 -0
agents/kollabor/__init__.py +0 -0
agents/kollabor/analyze-plugin-lifecycle.md +175 -0
agents/kollabor/analyze-terminal-rendering.md +388 -0
agents/kollabor/code-review.md +1092 -0
agents/kollabor/debug-mcp-integration.md +521 -0
agents/kollabor/debug-plugin-hooks.md +547 -0
agents/kollabor/debugging.md +1102 -0
agents/kollabor/dependency-management.md +1397 -0
agents/kollabor/git-workflow.md +1099 -0
agents/kollabor/inspect-llm-conversation.md +148 -0
agents/kollabor/monitor-event-bus.md +558 -0
agents/kollabor/profile-performance.md +576 -0
agents/kollabor/refactoring.md +1454 -0
agents/kollabor/system_prompt copy.md +1448 -0
agents/kollabor/system_prompt.md +757 -0
agents/kollabor/trace-command-execution.md +178 -0
agents/kollabor/validate-config.md +879 -0
agents/research/__init__.py +0 -0
agents/research/agent.json +4 -0
agents/research/architecture-mapping.md +1099 -0
agents/research/codebase-analysis.md +1077 -0
agents/research/dependency-audit.md +1027 -0
agents/research/performance-profiling.md +1047 -0
agents/research/security-review.md +1359 -0
agents/research/system_prompt.md +492 -0
agents/technical-writer/__init__.py +0 -0
agents/technical-writer/agent.json +4 -0
agents/technical-writer/api-documentation.md +2328 -0
agents/technical-writer/changelog-management.md +1181 -0
agents/technical-writer/readme-writing.md +1360 -0
agents/technical-writer/style-guide.md +1410 -0
agents/technical-writer/system_prompt.md +653 -0
agents/technical-writer/tutorial-creation.md +1448 -0
core/__init__.py +0 -2
core/application.py +343 -88
core/cli.py +229 -10
core/commands/menu_renderer.py +463 -59
core/commands/registry.py +14 -9
core/commands/system_commands.py +2461 -14
core/config/loader.py +151 -37
core/config/service.py +18 -6
core/events/bus.py +29 -9
core/events/executor.py +205 -75
core/events/models.py +27 -8
core/fullscreen/command_integration.py +20 -24
core/fullscreen/components/__init__.py +10 -1
core/fullscreen/components/matrix_components.py +1 -2
core/fullscreen/components/space_shooter_components.py +654 -0
core/fullscreen/plugin.py +5 -0
core/fullscreen/renderer.py +52 -13
core/fullscreen/session.py +52 -15
core/io/__init__.py +29 -5
core/io/buffer_manager.py +6 -1
core/io/config_status_view.py +7 -29
core/io/core_status_views.py +267 -347
core/io/input/__init__.py +25 -0
core/io/input/command_mode_handler.py +711 -0
core/io/input/display_controller.py +128 -0
core/io/input/hook_registrar.py +286 -0
core/io/input/input_loop_manager.py +421 -0
core/io/input/key_press_handler.py +502 -0
core/io/input/modal_controller.py +1011 -0
core/io/input/paste_processor.py +339 -0
core/io/input/status_modal_renderer.py +184 -0
core/io/input_errors.py +5 -1
core/io/input_handler.py +211 -2452
core/io/key_parser.py +7 -0
core/io/layout.py +15 -3
core/io/message_coordinator.py +111 -2
core/io/message_renderer.py +129 -4
core/io/status_renderer.py +147 -607
core/io/terminal_renderer.py +97 -51
core/io/terminal_state.py +21 -4
core/io/visual_effects.py +816 -165
core/llm/agent_manager.py +1063 -0
core/llm/api_adapters/__init__.py +44 -0
core/llm/api_adapters/anthropic_adapter.py +432 -0
core/llm/api_adapters/base.py +241 -0
core/llm/api_adapters/openai_adapter.py +326 -0
core/llm/api_communication_service.py +167 -113
core/llm/conversation_logger.py +322 -16
core/llm/conversation_manager.py +556 -30
core/llm/file_operations_executor.py +84 -32
core/llm/llm_service.py +934 -103
core/llm/mcp_integration.py +541 -57
core/llm/message_display_service.py +135 -18
core/llm/plugin_sdk.py +1 -2
core/llm/profile_manager.py +1183 -0
core/llm/response_parser.py +274 -56
core/llm/response_processor.py +16 -3
core/llm/tool_executor.py +6 -1
core/logging/__init__.py +2 -0
core/logging/setup.py +34 -6
core/models/resume.py +54 -0
core/plugins/__init__.py +4 -2
core/plugins/base.py +127 -0
core/plugins/collector.py +23 -161
core/plugins/discovery.py +37 -3
core/plugins/factory.py +6 -12
core/plugins/registry.py +5 -17
core/ui/config_widgets.py +128 -28
core/ui/live_modal_renderer.py +2 -1
core/ui/modal_actions.py +5 -0
core/ui/modal_overlay_renderer.py +0 -60
core/ui/modal_renderer.py +268 -7
core/ui/modal_state_manager.py +29 -4
core/ui/widgets/base_widget.py +7 -0
core/updates/__init__.py +10 -0
core/updates/version_check_service.py +348 -0
core/updates/version_comparator.py +103 -0
core/utils/config_utils.py +685 -526
core/utils/plugin_utils.py +1 -1
core/utils/session_naming.py +111 -0
fonts/LICENSE +21 -0
fonts/README.md +46 -0
fonts/SymbolsNerdFont-Regular.ttf +0 -0
fonts/SymbolsNerdFontMono-Regular.ttf +0 -0
fonts/__init__.py +44 -0
{kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/METADATA +54 -4
kollabor-0.4.15.dist-info/RECORD +228 -0
{kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/top_level.txt +2 -0
plugins/agent_orchestrator/__init__.py +39 -0
plugins/agent_orchestrator/activity_monitor.py +181 -0
plugins/agent_orchestrator/file_attacher.py +77 -0
plugins/agent_orchestrator/message_injector.py +135 -0
plugins/agent_orchestrator/models.py +48 -0
plugins/agent_orchestrator/orchestrator.py +403 -0
plugins/agent_orchestrator/plugin.py +976 -0
plugins/agent_orchestrator/xml_parser.py +191 -0
plugins/agent_orchestrator_plugin.py +9 -0
plugins/enhanced_input/box_styles.py +1 -0
plugins/enhanced_input/color_engine.py +19 -4
plugins/enhanced_input/config.py +2 -2
plugins/enhanced_input_plugin.py +61 -11
plugins/fullscreen/__init__.py +6 -2
plugins/fullscreen/example_plugin.py +1035 -222
plugins/fullscreen/setup_wizard_plugin.py +592 -0
plugins/fullscreen/space_shooter_plugin.py +131 -0
plugins/hook_monitoring_plugin.py +436 -78
plugins/query_enhancer_plugin.py +66 -30
plugins/resume_conversation_plugin.py +1494 -0
plugins/save_conversation_plugin.py +98 -32
plugins/system_commands_plugin.py +70 -56
plugins/tmux_plugin.py +154 -78
plugins/workflow_enforcement_plugin.py +94 -92
system_prompt/default.md +952 -886
core/io/input_mode_manager.py +0 -402
core/io/modal_interaction_handler.py +0 -315
core/io/raw_input_processor.py +0 -946
core/storage/__init__.py +0 -5
core/storage/state_manager.py +0 -84
core/ui/widget_integration.py +0 -222
core/utils/key_reader.py +0 -171
kollabor-0.4.9.dist-info/RECORD +0 -128
{kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/WHEEL +0 -0
{kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/entry_points.txt +0 -0
{kollabor-0.4.9.dist-info → kollabor-0.4.15.dist-info}/licenses/LICENSE +0 -0

core/llm/llm_service.py CHANGED Viewed

@@ -17,6 +17,7 @@ from ..events import EventType, Hook, HookPriority
 from ..config.llm_task_config import LLMTaskConfig
 from .api_communication_service import APICommunicationService
 from .conversation_logger import KollaborConversationLogger
+from .conversation_manager import ConversationManager
 from .hook_system import LLMHookSystem
 from .mcp_integration import MCPIntegration
 from .message_display_service import MessageDisplayService
@@ -73,27 +74,40 @@ class LLMService:
             import uuid
             message_uuid = str(uuid.uuid4())
-        # Add to legacy history for compatibility
+        # conversation_history: primary list used by API calls
+        # conversation_manager: adds persistence, UUID tracking, metadata
+        # both systems stay synchronized
         self.conversation_history.append(message)
         return message_uuid
-    def __init__(self, config, state_manager, event_bus, renderer, default_timeout: Optional[float] = None, enable_metrics: bool = False):
+    def __init__(
+        self,
+        config,
+        event_bus,
+        renderer,
+        profile_manager=None,
+        agent_manager=None,
+        default_timeout: Optional[float] = None,
+        enable_metrics: bool = False,
+    ):
         """Initialize the core LLM service.
         Args:
             config: Configuration manager instance
-            state_manager: State management system
             event_bus: Event bus for hook registration
             renderer: Terminal renderer for output
+            profile_manager: Profile manager for LLM endpoint profiles
+            agent_manager: Agent manager for agent/skill system
             default_timeout: Default timeout for background tasks in seconds
             enable_metrics: Whether to enable detailed task metrics tracking
         """
         self.config = config
-        self.state_manager = state_manager
         self.event_bus = event_bus
         self.renderer = renderer
+        self.profile_manager = profile_manager
+        self.agent_manager = agent_manager
         # Timeout and metrics configuration
         self.default_timeout = default_timeout
@@ -118,16 +132,21 @@ class LLMService:
         self.cancellation_message_shown = False
         # Initialize conversation logger with intelligence features
-        from ..utils.config_utils import get_config_directory
-        config_dir = get_config_directory()
-        conversations_dir = config_dir / "conversations"
+        from ..utils.config_utils import get_conversations_dir
+        conversations_dir = get_conversations_dir()
         conversations_dir.mkdir(parents=True, exist_ok=True)
-        # Initialize raw conversation logging directory
-        self.raw_conversations_dir = config_dir / "conversations_raw"
+        # Initialize raw conversation logging directory (inside conversations/)
+        self.raw_conversations_dir = conversations_dir / "raw"
         self.raw_conversations_dir.mkdir(parents=True, exist_ok=True)
         self.conversation_logger = KollaborConversationLogger(conversations_dir)
+        # Initialize conversation manager for advanced features
+        self.conversation_manager = ConversationManager(
+            config=self.config,
+            conversation_logger=self.conversation_logger
+        )
         # Initialize hook system
         self.hook_system = LLMHookSystem(event_bus)
@@ -137,18 +156,49 @@ class LLMService:
         self.tool_executor = ToolExecutor(
             mcp_integration=self.mcp_integration,
             event_bus=event_bus,
-            terminal_timeout=config.get("core.llm.terminal_timeout", 30),
-            mcp_timeout=config.get("core.llm.mcp_timeout", 60)
+            terminal_timeout=config.get("core.llm.terminal_timeout", 120),
+            mcp_timeout=config.get("core.llm.mcp_timeout", 120)
         )
+        # Native tool calling support (tools passed to API for native function calling)
+        # Both native API tool calls AND XML <tool_call> tags are supported
+        self.native_tools: Optional[List[Dict[str, Any]]] = None
+        self.native_tool_calling_enabled = config.get("core.llm.native_tool_calling", True)
         # Initialize message display service (KISS/DRY: eliminates duplicated display code)
         self.message_display = MessageDisplayService(renderer)
+        # Get active profile for API service (fallback to minimal default if no profile manager)
+        if self.profile_manager:
+            api_profile = self.profile_manager.get_active_profile()
+        else:
+            # Fallback: create minimal default profile (profile_manager should always exist)
+            from .profile_manager import LLMProfile
+            api_profile = LLMProfile(
+                name="default",
+                api_url="http://localhost:1234",
+                model="default",
+                temperature=0.7,
+            )
         # Initialize API communication service (KISS: pure API communication separation)
-        self.api_service = APICommunicationService(config, self.raw_conversations_dir)
+        self.api_service = APICommunicationService(config, self.raw_conversations_dir, api_profile)
+        # Link session ID for raw log correlation
+        self.api_service.set_session_id(self.conversation_logger.session_id)
         # Track current message threading
         self.current_parent_uuid = None
+        # Plugin instances reference (set after plugins are loaded)
+        self._plugin_instances: Optional[Dict[str, Any]] = None
+        # Question gate: pending tools queue
+        # When agent uses <question> tag, tool calls are suspended here
+        # and injected when user responds
+        self.pending_tools: List[Dict[str, Any]] = []
+        self.question_gate_active = False
+        self.question_gate_enabled = config.get("core.llm.question_gate_enabled", True)
         # Create hooks for LLM service
         self.hooks = [
@@ -165,6 +215,13 @@ class LLMService:
                 event_type=EventType.CANCEL_REQUEST,
                 priority=HookPriority.SYSTEM.value,
                 callback=self._handle_cancel_request
+            ),
+            Hook(
+                name="add_message_handler",
+                plugin_name="llm_core",
+                event_type=EventType.ADD_MESSAGE,
+                priority=HookPriority.LLM.value,
+                callback=self._handle_add_message
             )
         ]
@@ -179,8 +236,10 @@ class LLMService:
         }
         self.session_stats = {
-            "input_tokens": 0,
-            "output_tokens": 0,
+            "input_tokens": 0,  # Last request input tokens (context size)
+            "output_tokens": 0,  # Last request output tokens
+            "total_input_tokens": 0,  # Cumulative session input
+            "total_output_tokens": 0,  # Cumulative session output
             "messages": 0
         }
@@ -217,7 +276,7 @@ class LLMService:
         logger.info("Core LLM Service initialized")
-    async def initialize(self):
+    async def initialize(self) -> bool:
         """Initialize the LLM service components."""
         # Initialize API communication service (KISS refactoring)
         await self.api_service.initialize()
@@ -225,16 +284,16 @@ class LLMService:
         # Register hooks
         await self.hook_system.register_hooks()
-        # Discover and register MCP servers and tools
-        try:
-            discovered_servers = await self.mcp_integration.discover_mcp_servers()
-            logger.info(f"Discovered {len(discovered_servers)} MCP servers")
-        except Exception as e:
-            logger.warning(f"MCP discovery failed: {e}")
+        # Discover MCP servers in background (non-blocking startup)
+        # This allows the UI to start immediately while MCP servers connect
+        self.create_background_task(self._background_mcp_discovery(), name="mcp_discovery")
         # Initialize conversation with context
         await self._initialize_conversation()
+        # Set conversation context before logging start
+        self._set_conversation_context()
         # Log conversation start
         await self.conversation_logger.log_conversation_start()
@@ -243,14 +302,14 @@ class LLMService:
             await self.start_task_monitor()
         logger.info("Core LLM Service initialized and ready")
+        return True
     async def _initialize_conversation(self):
         """Initialize conversation with project context."""
         try:
             # Clear any existing history
             self.conversation_history = []
-            self.state_manager.set("llm.conversation_history", [])
             # Build system prompt from configuration
             initial_message = self._build_system_prompt()
@@ -273,6 +332,45 @@ class LLMService:
         except Exception as e:
             logger.error(f"Failed to initialize conversation: {e}")
+    def _set_conversation_context(self):
+        """Set dynamic context on conversation logger before logging start."""
+        # Get version
+        try:
+            from importlib.metadata import version
+            app_version = version("kollabor")
+        except Exception:
+            try:
+                from pathlib import Path
+                pyproject = Path(__file__).parent.parent.parent / "pyproject.toml"
+                if pyproject.exists():
+                    for line in pyproject.read_text().split("\n"):
+                        if line.startswith("version ="):
+                            app_version = line.split('"')[1]
+                            break
+                    else:
+                        app_version = "unknown"
+                else:
+                    app_version = "unknown"
+            except Exception:
+                app_version = "unknown"
+        # Get active plugins from event bus
+        active_plugins = []
+        if self.event_bus and hasattr(self.event_bus, 'registry'):
+            try:
+                hooks = self.event_bus.registry.get_all_hooks()
+                plugin_names = set()
+                for hook_list in hooks.values():
+                    for hook in hook_list:
+                        if hasattr(hook, '__self__'):
+                            plugin_names.add(type(hook.__self__).__name__)
+                active_plugins = sorted(plugin_names)
+            except Exception:
+                pass
+        self.conversation_logger.app_version = app_version
+        self.conversation_logger.active_plugins = active_plugins
     async def _enqueue_with_overflow_strategy(self, message: str) -> None:
         """Enqueue message with configurable overflow strategy.
@@ -778,40 +876,22 @@ class LLMService:
         except Exception as e:
             logger.warning(f"Failed to get tree output: {e}")
             return "Could not get directory listing"
-    def _build_system_prompt(self) -> str:
-        """Build system prompt from file (not config.json).
-        Priority:
-        1. KOLLABOR_SYSTEM_PROMPT environment variable (direct string)
-        2. KOLLABOR_SYSTEM_PROMPT_FILE environment variable (custom file path)
-        3. Local .kollabor-cli/system_prompt/default.md (project override)
-        4. Global ~/.kollabor-cli/system_prompt/default.md
-        5. Fallback to minimal default
+    def _finalize_system_prompt(self, prompt_parts: List[str]) -> str:
+        """Finalize system prompt by adding common sections.
+        Args:
+            prompt_parts: List of prompt parts (base prompt should be first)
         Returns:
-            Fully rendered system prompt with all <trender> tags executed.
+            Complete system prompt string
         """
-        from ..utils.config_utils import get_system_prompt_content, initialize_system_prompt
-        from ..utils.prompt_renderer import render_system_prompt
-        # Ensure system prompts are initialized (copies global to local if needed)
-        initialize_system_prompt()
-        # Load base prompt (checks env vars and files in priority order)
-        base_prompt = get_system_prompt_content()
-        # Render <trender> tags BEFORE building the full prompt
-        base_prompt = render_system_prompt(base_prompt, timeout=5)
-        prompt_parts = [base_prompt]
         # Add project structure if enabled
         include_structure = self.config.get("core.llm.system_prompt.include_project_structure", True)
         if include_structure:
             tree_output = self._get_tree_output()
             prompt_parts.append(f"## Project Structure\n```\n{tree_output}\n```")
         # Add attachment files
         attachment_files = self.config.get("core.llm.system_prompt.attachment_files", [])
         for filename in attachment_files:
@@ -823,7 +903,7 @@ class LLMService:
                     logger.debug(f"Attached file: {filename}")
                 except Exception as e:
                     logger.warning(f"Failed to read {filename}: {e}")
         # Add custom prompt files
         custom_files = self.config.get("core.llm.system_prompt.custom_prompt_files", [])
         for filename in custom_files:
@@ -835,47 +915,435 @@ class LLMService:
                     logger.debug(f"Added custom prompt: {filename}")
                 except Exception as e:
                     logger.warning(f"Failed to read custom prompt {filename}: {e}")
+        # Add plugin system prompt additions
+        plugin_additions = self._get_plugin_system_prompt_additions()
+        for addition in plugin_additions:
+            prompt_parts.append(addition)
         # Add closing statement
         prompt_parts.append("This is the codebase and context for our session. You now have full project awareness.")
         return "\n\n".join(prompt_parts)
+    def set_plugin_instances(self, plugin_instances: Dict[str, Any]) -> None:
+        """Set plugin instances reference for system prompt additions.
+        Called by the application after plugins are loaded.
+        Args:
+            plugin_instances: Dictionary of plugin name to plugin instance
+        """
+        self._plugin_instances = plugin_instances
+        logger.debug(f"Plugin instances set: {len(plugin_instances)} plugins")
+    def _get_plugin_system_prompt_additions(self) -> List[str]:
+        """Get system prompt additions from all plugins.
+        Queries each plugin that implements get_system_prompt_addition()
+        and collects their additions.
+        Returns:
+            List of system prompt addition strings
+        """
+        additions = []
+        if not self._plugin_instances:
+            return additions
+        for plugin_name, plugin_instance in self._plugin_instances.items():
+            if hasattr(plugin_instance, 'get_system_prompt_addition'):
+                try:
+                    addition = plugin_instance.get_system_prompt_addition()
+                    if addition:
+                        additions.append(addition)
+                        logger.debug(f"Plugin '{plugin_name}' added system prompt content")
+                except Exception as e:
+                    logger.warning(f"Failed to get system prompt addition from '{plugin_name}': {e}")
+        return additions
+    def _build_system_prompt(self) -> str:
+        """Build system prompt from file or agent.
+        Priority:
+        0. Active agent's system prompt (if agent is active)
+        1. KOLLABOR_SYSTEM_PROMPT environment variable (direct string)
+        2. KOLLABOR_SYSTEM_PROMPT_FILE environment variable (custom file path)
+        3. Local .kollabor-cli/system_prompt/default.md (project override)
+        4. Global ~/.kollabor-cli/system_prompt/default.md
+        5. Fallback to minimal default
+        Returns:
+            Fully rendered system prompt with all <trender> tags executed.
+        """
+        from ..utils.config_utils import get_system_prompt_content, initialize_system_prompt
+        from ..utils.prompt_renderer import render_system_prompt
+        # Check if we have an active agent with a system prompt
+        if self.agent_manager:
+            agent_prompt = self.agent_manager.get_system_prompt()
+            if agent_prompt:
+                # Render <trender> tags in agent prompt
+                base_prompt = render_system_prompt(agent_prompt, timeout=5)
+                logger.info(f"Using agent system prompt from: {self.agent_manager.active_agent_name}")
+                # Continue with the rest of the build process using agent prompt
+                prompt_parts = [base_prompt]
+                return self._finalize_system_prompt(prompt_parts)
+        # Ensure system prompts are initialized (copies global to local if needed)
+        initialize_system_prompt()
+        # Load base prompt (checks env vars and files in priority order)
+        base_prompt = get_system_prompt_content()
+        # Render <trender> tags BEFORE building the full prompt
+        base_prompt = render_system_prompt(base_prompt, timeout=5)
+        prompt_parts = [base_prompt]
+        return self._finalize_system_prompt(prompt_parts)
+    def rebuild_system_prompt(self) -> bool:
+        """Rebuild the system prompt and update conversation history.
+        Call this after skills are loaded/unloaded to update the system message
+        with the new prompt content including active skills.
+        Returns:
+            True if system prompt was rebuilt successfully.
+        """
+        try:
+            new_prompt = self._build_system_prompt()
+            # Update the first message in conversation history (system message)
+            if self.conversation_history:
+                first_msg = self.conversation_history[0]
+                if first_msg.role == "system":
+                    # Create new message with updated content
+                    self.conversation_history[0] = ConversationMessage(
+                        role="system",
+                        content=new_prompt
+                    )
+                    logger.info("System prompt rebuilt with updated skills")
+                    return True
+            logger.warning("No system message found to update")
+            return False
+        except Exception as e:
+            logger.error(f"Failed to rebuild system prompt: {e}")
+            return False
+    async def _background_mcp_discovery(self) -> None:
+        """Discover MCP servers in background (non-blocking).
+        Runs MCP server discovery asynchronously so the UI can start
+        immediately. Updates native_tools when discovery completes.
+        """
+        try:
+            discovered_servers = await self.mcp_integration.discover_mcp_servers()
+            logger.info(f"Background MCP discovery: found {len(discovered_servers)} servers")
+            # Load native tools now that MCP is ready
+            await self._load_native_tools()
+        except Exception as e:
+            logger.warning(f"Background MCP discovery failed: {e}")
+    async def _load_native_tools(self) -> None:
+        """Load MCP tools for native API function calling.
+        Populates self.native_tools with tool definitions from MCP integration
+        for passing to API calls. This enables native tool calling where the
+        LLM returns structured tool_calls instead of XML tags.
+        Respects both:
+        - Global config: core.llm.native_tool_calling (default: True)
+        - Profile setting: profile.native_tool_calling (default: True)
+        Both must be True for native tools to be loaded. When disabled,
+        the LLM uses XML tags (<terminal>, <tool>, etc.) instead.
+        """
+        # Check global config setting
+        if not self.native_tool_calling_enabled:
+            logger.info("Native tool calling disabled in global config")
+            self.native_tools = None
+            return
+        # Check profile-specific setting
+        profile = self.profile_manager.get_active_profile()
+        profile_native = profile.get_native_tool_calling()
+        if not profile_native:
+            logger.info(f"Native tool calling disabled for profile '{profile.name}' (using XML mode)")
+            self.native_tools = None
+            return
+        try:
+            tools = self.mcp_integration.get_tool_definitions_for_api()
+            if tools:
+                self.native_tools = tools
+                logger.info(f"Loaded {len(tools)} tools for native API calling")
+            else:
+                self.native_tools = None
+                logger.debug("No MCP tools available for native calling")
+        except Exception as e:
+            logger.warning(f"Failed to load native tools: {e}")
+            self.native_tools = None
+    async def _execute_native_tool_calls(self) -> List[Any]:
+        """Execute tool calls from native API response.
+        Processes tool calls returned by the API's native function calling
+        and executes them through the tool executor.
+        Handles edge case where LLM outputs malformed tool names containing XML.
+        Returns:
+            List of ToolExecutionResult objects
+        """
+        import re
+        from .tool_executor import ToolExecutionResult
+        results = []
+        tool_calls = self.api_service.get_last_tool_calls()
+        if not tool_calls:
+            return results
+        logger.info(f"Executing {len(tool_calls)} native tool calls")
+        for tc in tool_calls:
+            tool_name = tc.tool_name
+            # Handle malformed tool names that contain XML (LLM confusion)
+            # Example: "read><file>path</file></read><tool_call>search_nodes"
+            if '<' in tool_name or '>' in tool_name:
+                logger.warning(f"Malformed tool name detected: {tool_name[:100]}")
+                # Try to extract actual tool name from <tool_call>...</tool_call>
+                match = re.search(r'<tool_call>([^<]+)', tool_name)
+                if match:
+                    tool_name = match.group(1).strip()
+                    logger.info(f"Extracted tool name from malformed call: {tool_name}")
+                else:
+                    # Try to find any known MCP tool name in the string
+                    available_tools = list(self.mcp_integration.tool_registry.keys())
+                    for known_tool in available_tools:
+                        if known_tool in tool_name:
+                            tool_name = known_tool
+                            logger.info(f"Found known tool in malformed name: {tool_name}")
+                            break
+                    else:
+                        logger.error(f"Could not parse malformed tool name: {tool_name[:100]}")
+                        continue
+            # Convert ToolCallResult to tool_executor format
+            # File operations use their name as type (file_create, file_edit, etc.)
+            # Terminal commands use "terminal" as type
+            # MCP tools use "mcp_tool" as type
+            if tool_name.startswith("file_"):
+                tool_type = tool_name
+                # Map file operation arguments to expected format
+                tool_data = {
+                    "type": tool_type,
+                    "id": tc.tool_id,
+                    **tc.arguments  # Spread arguments directly (file, content, etc.)
+                }
+            elif tool_name == "terminal":
+                tool_type = "terminal"
+                tool_data = {
+                    "type": tool_type,
+                    "id": tc.tool_id,
+                    "command": tc.arguments.get("command", "")
+                }
+            else:
+                tool_type = "mcp_tool"
+                tool_data = {
+                    "type": tool_type,
+                    "id": tc.tool_id,
+                    "name": tool_name,
+                    "arguments": tc.arguments
+                }
+            result = await self.tool_executor.execute_tool(tool_data)
+            results.append(result)
+            logger.debug(f"Native tool {tool_name}: {'success' if result.success else 'failed'}")
+        return results
     async def process_user_input(self, message: str) -> Dict[str, Any]:
         """Process user input through the LLM.
         This is the main entry point for user messages.
         Args:
             message: User's input message
         Returns:
             Status information about processing
         """
         # Display user message using MessageDisplayService (DRY refactoring)
         logger.debug(f"DISPLAY DEBUG: About to display user message: '{message[:100]}...' ({len(message)} chars)")
         self.message_display.display_user_message(message)
+        # Question gate: if enabled and there are pending tools, execute them now
+        # and inject results into conversation before processing user message
+        tool_injection_results = None
+        if self.question_gate_enabled and self.question_gate_active and self.pending_tools:
+            logger.info(f"Question gate: executing {len(self.pending_tools)} suspended tool(s)")
+            # Show tool execution indicator (prevents UI freeze appearance)
+            tool_count = len(self.pending_tools)
+            tool_desc = self.pending_tools[0].get("type", "tool") if tool_count == 1 else f"{tool_count} tools"
+            self.renderer.update_thinking(True, f"Executing {tool_desc}...")
+            tool_injection_results = await self.tool_executor.execute_all_tools(self.pending_tools)
+            # Stop tool execution indicator
+            self.renderer.update_thinking(False)
+            # Display and log tool results
+            if tool_injection_results:
+                # Display tool results
+                self.message_display.display_complete_response(
+                    thinking_duration=0,
+                    response="",
+                    tool_results=tool_injection_results,
+                    original_tools=self.pending_tools
+                )
+                # Add tool results to conversation history
+                batched_tool_results = []
+                for result in tool_injection_results:
+                    await self.conversation_logger.log_system_message(
+                        f"Executed {result.tool_type} ({result.tool_id}): {result.output if result.success else result.error}",
+                        parent_uuid=self.current_parent_uuid,
+                        subtype="tool_call"
+                    )
+                    tool_context = self.tool_executor.format_result_for_conversation(result)
+                    batched_tool_results.append(f"Tool result: {tool_context}")
+                if batched_tool_results:
+                    self._add_conversation_message(ConversationMessage(
+                        role="user",
+                        content="\n".join(batched_tool_results)
+                    ))
+            # Clear question gate state
+            self.pending_tools = []
+            self.question_gate_active = False
+            logger.info("Question gate: cleared after tool execution")
         # Reset turn_completed flag
         self.turn_completed = False
         self.cancel_processing = False
         self.cancellation_message_shown = False
         # Log user message
         self.current_parent_uuid = await self.conversation_logger.log_user_message(
             message,
             parent_uuid=self.current_parent_uuid
         )
         # Add to processing queue with overflow handling
         await self._enqueue_with_overflow_strategy(message)
         # Start processing if not already running
         if not self.is_processing:
             self.create_background_task(self._process_queue(), name="process_queue")
-        return {"status": "queued"}
+        return {"status": "queued", "tools_injected": len(tool_injection_results) if tool_injection_results else 0}
+    def process_user_input_background(
+        self,
+        message: str,
+        task_name: str = None,
+        custom_system_prompt: str = None,
+        silent: bool = True
+    ) -> asyncio.Task:
+        """Process user input in background without blocking.
+        This creates a background task that processes the message independently.
+        The task will show up in the status line with elapsed time.
+        Args:
+            message: User's input message
+            task_name: Optional custom name for the task (appears in status line)
+            custom_system_prompt: Optional custom system prompt to use instead of default
+            silent: If True, don't display user message or thinking animation (default: True)
+        Returns:
+            The background task object (can be used to check status or cancel)
+        """
+        task_name = task_name or f"background_input_{len(self._background_tasks)}"
+        async def _background_process():
+            """Inner coroutine that processes the message."""
+            try:
+                # Temporarily override system prompt if provided
+                original_system_message = None
+                if custom_system_prompt and self.conversation_history:
+                    # Save original system message (first message in history)
+                    original_system_message = self.conversation_history[0]
+                    # Replace with custom system prompt
+                    self.conversation_history[0] = ConversationMessage(
+                        role="system",
+                        content=custom_system_prompt
+                    )
+                    logger.info(f"Background task '{task_name}': using custom system prompt ({len(custom_system_prompt)} chars)")
+                # Process silently - skip user message display
+                if silent:
+                    # Log the message but don't display it
+                    logger.info(f"Background task '{task_name}': silently queuing message ({len(message)} chars)")
+                    # Reset flags
+                    self.turn_completed = False
+                    self.cancel_processing = False
+                    self.cancellation_message_shown = False
+                    # Log user message (but don't display)
+                    self.current_parent_uuid = await self.conversation_logger.log_user_message(
+                        message,
+                        parent_uuid=self.current_parent_uuid
+                    )
+                    # Add to processing queue
+                    await self._enqueue_with_overflow_strategy(message)
+                    # Start processing if not already running
+                    if not self.is_processing:
+                        self.create_background_task(self._process_queue(), name=f"process_queue")
+                    logger.info(f"Background task '{task_name}': message queued, processing in background")
+                    # Don't wait - return immediately and let it process in background
+                else:
+                    # Use normal processing (shows user message and thinking)
+                    await self.process_user_input(message)
+                # Restore original system prompt
+                if original_system_message is not None:
+                    self.conversation_history[0] = original_system_message
+                    logger.debug(f"Background task '{task_name}': restored original system prompt")
+                logger.info(f"Background task '{task_name}' completed successfully")
+                return {"status": "completed"}
+            except Exception as e:
+                logger.error(f"Background task '{task_name}' failed: {e}")
+                # Restore system prompt on error
+                if original_system_message is not None:
+                    self.conversation_history[0] = original_system_message
+                raise
+        # Create and track the background task
+        task = self.create_background_task(_background_process(), name=task_name)
+        logger.info(f"Started background task: {task_name}")
+        return task
     async def _handle_user_input(self, data: Dict[str, Any], event) -> Dict[str, Any]:
         """Handle user input hook callback.
@@ -922,7 +1390,120 @@ class LLMService:
         logger.info(f"LLM SERVICE: Cancellation flag set: {self.cancel_processing}")
         return {"status": "cancelled", "reason": reason}
+    async def _handle_add_message(self, data: Dict[str, Any], event) -> Dict[str, Any]:
+        """Handle ADD_MESSAGE event - inject messages into conversation.
+        This allows plugins to inject messages into the conversation that:
+        - Get added to AI-visible history
+        - Get logged to conversation logger
+        - Get displayed to user with loading indicator
+        - Optionally trigger LLM response
+        Args:
+            data: Event data with messages array and options
+            event: The event object
+        Returns:
+            Result dict with status and message count
+        """
+        messages = data.get("messages", [])
+        options = data.get("options", {})
+        if not messages:
+            return {"success": False, "error": "No messages provided"}
+        show_loading = options.get("show_loading", True)
+        loading_message = options.get("loading_message", "Loading...")
+        log_messages = options.get("log_messages", True)
+        add_to_history = options.get("add_to_history", True)
+        display_messages = options.get("display_messages", True)
+        trigger_llm = options.get("trigger_llm", False)
+        parent_uuid = options.get("parent_uuid", self.current_parent_uuid)
+        try:
+            # Show loading indicator
+            if show_loading:
+                self.message_display.show_loading(loading_message)
+            display_sequence = []
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                # Add to conversation history
+                if add_to_history:
+                    from ..models import ConversationMessage
+                    self._add_conversation_message(
+                        ConversationMessage(role=role, content=content),
+                        parent_uuid=parent_uuid
+                    )
+                # Log message
+                if log_messages:
+                    if role == "user":
+                        parent_uuid = await self.conversation_logger.log_user_message(
+                            content, parent_uuid=parent_uuid
+                        )
+                    elif role == "assistant":
+                        parent_uuid = await self.conversation_logger.log_assistant_message(
+                            content, parent_uuid=parent_uuid
+                        )
+                    elif role == "system":
+                        await self.conversation_logger.log_system_message(
+                            content, parent_uuid=parent_uuid
+                        )
+                # Build display sequence
+                if display_messages and role in ("user", "assistant", "system"):
+                    display_sequence.append((role, content, {}))
+            # Display messages atomically
+            if display_messages and display_sequence:
+                self.message_display.message_coordinator.display_message_sequence(
+                    display_sequence
+                )
+            # Hide loading before display
+            if show_loading:
+                await asyncio.sleep(0.5)
+                self.message_display.hide_loading()
+            # Update session stats
+            if hasattr(self, 'session_stats'):
+                self.session_stats["messages"] += len(messages)
+            # Optionally trigger LLM response
+            if trigger_llm:
+                # Find the last user message to process
+                last_user_msg = None
+                for msg in reversed(messages):
+                    if msg.get("role") == "user":
+                        last_user_msg = msg.get("content", "")
+                        break
+                if last_user_msg:
+                    await self._enqueue_with_overflow_strategy(last_user_msg)
+                    if not self.is_processing:
+                        self.create_background_task(self._process_queue(), name="process_queue")
+            logger.info(f"ADD_MESSAGE: Processed {len(messages)} messages, trigger_llm={trigger_llm}")
+            return {
+                "success": True,
+                "message_count": len(messages),
+                "parent_uuid": parent_uuid,
+                "llm_triggered": trigger_llm
+            }
+        except Exception as e:
+            # Ensure loading is hidden on error
+            if show_loading:
+                self.message_display.hide_loading()
+            logger.error(f"Error in ADD_MESSAGE handler: {e}")
+            return {"success": False, "error": str(e)}
     async def register_hooks(self) -> None:
         """Register LLM service hooks with the event bus."""
         for hook in self.hooks:
@@ -957,8 +1538,12 @@ class LLMService:
             except Exception as e:
                 logger.error(f"Queue processing error: {e}")
-                # Display error using MessageDisplayService (DRY refactoring)
-                self.message_display.display_error_message(str(e))
+                # Provide user-friendly error messages
+                error_msg = str(e)
+                if "'str' object has no attribute 'get'" in error_msg:
+                    error_msg = ("API format mismatch. Your profile's tool_format setting may be wrong.\n"
+                                "Run /profile, press 'e' to edit, and check Tool Format matches your API.")
+                self.message_display.display_error_message(error_msg)
                 break
         # Continue conversation until completed (unlimited agentic turns)
@@ -1016,18 +1601,84 @@ class LLMService:
             if token_usage:
                 prompt_tokens = token_usage.get("prompt_tokens", 0)
                 completion_tokens = token_usage.get("completion_tokens", 0)
-                self.session_stats["input_tokens"] += prompt_tokens
-                self.session_stats["output_tokens"] += completion_tokens
+                # Store last request tokens (for context window display)
+                self.session_stats["input_tokens"] = prompt_tokens
+                self.session_stats["output_tokens"] = completion_tokens
+                # Accumulate totals
+                self.session_stats["total_input_tokens"] += prompt_tokens
+                self.session_stats["total_output_tokens"] += completion_tokens
                 logger.debug(f"Token usage: {prompt_tokens} input, {completion_tokens} output")
+            # Check for native tool calls (API function calling)
+            # Native calls are optional; XML-based tools are the Kollabor standard
+            if self.native_tool_calling_enabled and self.api_service.has_pending_tool_calls():
+                thinking_duration = time.time() - thinking_start
+                self.renderer.update_thinking(False)
+                logger.info("Processing native tool calls from API response")
+                # Build original_tools list for display (before execution may modify names)
+                raw_tool_calls = self.api_service.get_last_tool_calls()
+                original_tools = [
+                    {"name": tc.tool_name, "arguments": tc.arguments}
+                    for tc in raw_tool_calls
+                ]
+                # Show tool execution indicator (prevents UI freeze appearance)
+                tool_count = len(raw_tool_calls)
+                tool_desc = raw_tool_calls[0].tool_name if tool_count == 1 else f"{tool_count} tools"
+                self.renderer.update_thinking(True, f"Executing {tool_desc}...")
+                native_results = await self._execute_native_tool_calls()
+                # Stop tool execution indicator
+                self.renderer.update_thinking(False)
+                # Display response and native tool results
+                self.message_display.display_complete_response(
+                    thinking_duration=thinking_duration,
+                    response=response,
+                    tool_results=native_results,
+                    original_tools=original_tools
+                )
+                # Add assistant response to history
+                self._add_conversation_message(ConversationMessage(
+                    role="assistant",
+                    content=response
+                ))
+                # Add tool results to conversation using native format
+                for result in native_results:
+                    tool_calls = self.api_service.get_last_tool_calls()
+                    for tc in tool_calls:
+                        if tc.tool_id == result.tool_id:
+                            msg = self.api_service.format_tool_result(
+                                tc.tool_id,
+                                result.output if result.success else result.error,
+                                is_error=not result.success
+                            )
+                            # Add formatted tool result to conversation
+                            self._add_conversation_message(ConversationMessage(
+                                role=msg.get("role", "tool"),
+                                content=str(msg.get("content", result.output))
+                            ))
+                            break
+                # Continue conversation to get LLM response with tool results
+                self.turn_completed = False
+                self.stats["total_thinking_time"] += thinking_duration
+                self.session_stats["messages"] += 1
+                return  # Native tools handled, continue conversation loop
             # Stop thinking animation and show completion message
             thinking_duration = time.time() - thinking_start
             self.renderer.update_thinking(False)
             # Brief pause to ensure clean transition from thinking to completion message
             await asyncio.sleep(self.config.get("core.llm.processing_delay", 0.1))
-            # Parse response using new ResponseParser
+            # Parse response using ResponseParser for XML-based tools (Kollabor standard)
             parsed_response = self.response_parser.parse_response(response)
             clean_response = parsed_response["content"]
             all_tools = self.response_parser.get_all_tools(parsed_response)
@@ -1051,18 +1702,60 @@ class LLMService:
                 # Stop generating animation before message display
                 self.renderer.update_thinking(False)
-            # Execute all tools (terminal commands and MCP tools) if any
+            # Question gate: if enabled and question tag present, suspend tool execution
             tool_results = None
             if all_tools:
-                tool_results = await self.tool_executor.execute_all_tools(all_tools)
+                if self.question_gate_enabled and parsed_response.get("question_gate_active"):
+                    # Store tools for later execution when user responds
+                    self.pending_tools = all_tools
+                    self.question_gate_active = True
+                    logger.info(f"Question gate: suspended {len(all_tools)} tool(s) pending user response")
+                else:
+                    # Show tool execution indicator (prevents UI freeze appearance)
+                    tool_count = len(all_tools)
+                    tool_desc = all_tools[0].get("type", "tool") if tool_count == 1 else f"{tool_count} tools"
+                    self.renderer.update_thinking(True, f"Executing {tool_desc}...")
+                    # Execute tools normally
+                    tool_results = await self.tool_executor.execute_all_tools(all_tools)
+                    # Stop tool execution indicator
+                    self.renderer.update_thinking(False)
+            # Emit LLM_RESPONSE event BEFORE display so plugins can show tool indicators first
+            response_context = await self.event_bus.emit_with_hooks(
+                EventType.LLM_RESPONSE,
+                {
+                    "response_text": response,
+                    "clean_response": clean_response,
+                    "thinking_duration": thinking_duration,
+                    "tool_results": tool_results,
+                },
+                "llm_service"
+            )
+            # Check if any plugin wants to force continuation (e.g., agent orchestrator)
+            # Plugins can set force_continue in any phase (pre, main, post)
+            force_continue = False
+            if response_context:
+                for phase in ["pre", "main", "post"]:
+                    phase_data = response_context.get(phase, {})
+                    final_data = phase_data.get("final_data", {})
+                    if final_data.get("force_continue"):
+                        force_continue = True
+                        break
+            if force_continue:
+                self.turn_completed = False
+                logger.info("Plugin requested turn continuation")
             # Display thinking duration, response, and tool results atomically using unified method
+            # Note: when question gate is active, tool_results is None (tools not executed yet)
             self.message_display.display_complete_response(
                 thinking_duration=thinking_duration,
                 response=clean_response,
                 tool_results=tool_results,
-                original_tools=all_tools
+                original_tools=all_tools if not self.question_gate_active else None
             )
             # Log assistant response
@@ -1111,11 +1804,6 @@ class LLMService:
             # Clear any display artifacts
             self.renderer.clear_active_area()
-            # Remove the user message that was just added since processing was cancelled
-            if self.conversation_history and self.conversation_history[-1].role == "user":
-                self.conversation_history.pop()
-                logger.info("Removed cancelled user message from conversation history")
             # Show cancellation message (only once)
             if not self.cancellation_message_shown:
                 self.cancellation_message_shown = True
@@ -1131,8 +1819,12 @@ class LLMService:
         except Exception as e:
             logger.error(f"Error processing message batch: {e}")
             self.renderer.update_thinking(False)
-            # Display error using MessageDisplayService (DRY refactoring)
-            self.message_display.display_error_message(str(e))
+            # Provide user-friendly error messages
+            error_msg = str(e)
+            if "'str' object has no attribute 'get'" in error_msg:
+                error_msg = ("API format mismatch. Your profile's tool_format setting may be wrong.\n"
+                            "Run /profile, press 'e' to edit, and check Tool Format matches your API.")
+            self.message_display.display_error_message(error_msg)
             # Complete turn on error to prevent infinite loops
             self.turn_completed = True
@@ -1155,18 +1847,82 @@ class LLMService:
             if token_usage:
                 prompt_tokens = token_usage.get("prompt_tokens", 0)
                 completion_tokens = token_usage.get("completion_tokens", 0)
-                self.session_stats["input_tokens"] += prompt_tokens
-                self.session_stats["output_tokens"] += completion_tokens
+                # Store last request tokens (for context window display)
+                self.session_stats["input_tokens"] = prompt_tokens
+                self.session_stats["output_tokens"] = completion_tokens
+                # Accumulate totals
+                self.session_stats["total_input_tokens"] += prompt_tokens
+                self.session_stats["total_output_tokens"] += completion_tokens
                 logger.debug(f"Token usage: {prompt_tokens} input, {completion_tokens} output")
-            # Parse response using new ResponseParser
+            # Check for native tool calls (API function calling)
+            # Native calls are optional; XML-based tools are the Kollabor standard
+            if self.native_tool_calling_enabled and self.api_service.has_pending_tool_calls():
+                thinking_duration = time.time() - thinking_start
+                self.renderer.update_thinking(False)
+                logger.info("Processing native tool calls from API response (continue)")
+                # Build original_tools list for display
+                raw_tool_calls = self.api_service.get_last_tool_calls()
+                original_tools = [
+                    {"name": tc.tool_name, "arguments": tc.arguments}
+                    for tc in raw_tool_calls
+                ]
+                # Show tool execution indicator (prevents UI freeze appearance)
+                tool_count = len(raw_tool_calls)
+                tool_desc = raw_tool_calls[0].tool_name if tool_count == 1 else f"{tool_count} tools"
+                self.renderer.update_thinking(True, f"Executing {tool_desc}...")
+                native_results = await self._execute_native_tool_calls()
+                # Stop tool execution indicator
+                self.renderer.update_thinking(False)
+                # Display response and native tool results
+                self.message_display.display_complete_response(
+                    thinking_duration=thinking_duration,
+                    response=response,
+                    tool_results=native_results,
+                    original_tools=original_tools
+                )
+                # Add assistant response to history
+                self._add_conversation_message(ConversationMessage(
+                    role="assistant",
+                    content=response
+                ))
+                # Add tool results to conversation using native format
+                for result in native_results:
+                    tool_calls = self.api_service.get_last_tool_calls()
+                    for tc in tool_calls:
+                        if tc.tool_id == result.tool_id:
+                            msg = self.api_service.format_tool_result(
+                                tc.tool_id,
+                                result.output if result.success else result.error,
+                                is_error=not result.success
+                            )
+                            self._add_conversation_message(ConversationMessage(
+                                role=msg.get("role", "tool"),
+                                content=str(msg.get("content", result.output))
+                            ))
+                            break
+                # Continue conversation to get LLM response with tool results
+                self.turn_completed = False
+                self.stats["total_thinking_time"] += thinking_duration
+                return  # Native tools handled, continue conversation loop
+            # Parse response using ResponseParser for XML-based tools (Kollabor standard)
             parsed_response = self.response_parser.parse_response(response)
             clean_response = parsed_response["content"]
             all_tools = self.response_parser.get_all_tools(parsed_response)
             # Update turn completion state
             self.turn_completed = parsed_response["turn_completed"]
             thinking_duration = time.time() - thinking_start
             self.renderer.update_thinking(False)
@@ -1185,18 +1941,60 @@ class LLMService:
                 # Stop generating animation before message display
                 self.renderer.update_thinking(False)
-            # Execute all tools (terminal commands and MCP tools) if any
+            # Question gate: if enabled and question tag present, suspend tool execution
             tool_results = None
             if all_tools:
-                tool_results = await self.tool_executor.execute_all_tools(all_tools)
+                if self.question_gate_enabled and parsed_response.get("question_gate_active"):
+                    # Store tools for later execution when user responds
+                    self.pending_tools = all_tools
+                    self.question_gate_active = True
+                    logger.info(f"Question gate (continue): suspended {len(all_tools)} tool(s) pending user response")
+                else:
+                    # Show tool execution indicator (prevents UI freeze appearance)
+                    tool_count = len(all_tools)
+                    tool_desc = all_tools[0].get("type", "tool") if tool_count == 1 else f"{tool_count} tools"
+                    self.renderer.update_thinking(True, f"Executing {tool_desc}...")
+                    # Execute tools normally
+                    tool_results = await self.tool_executor.execute_all_tools(all_tools)
+                    # Stop tool execution indicator
+                    self.renderer.update_thinking(False)
+            # Emit LLM_RESPONSE event BEFORE display so plugins can show tool indicators first
+            response_context = await self.event_bus.emit_with_hooks(
+                EventType.LLM_RESPONSE,
+                {
+                    "response_text": response,
+                    "clean_response": clean_response,
+                    "thinking_duration": thinking_duration,
+                    "tool_results": tool_results,
+                },
+                "llm_service"
+            )
+            # Check if any plugin wants to force continuation (e.g., agent orchestrator)
+            # Plugins can set force_continue in any phase (pre, main, post)
+            force_continue = False
+            if response_context:
+                for phase in ["pre", "main", "post"]:
+                    phase_data = response_context.get(phase, {})
+                    final_data = phase_data.get("final_data", {})
+                    if final_data.get("force_continue"):
+                        force_continue = True
+                        break
+            if force_continue:
+                self.turn_completed = False
+                logger.info("Plugin requested turn continuation (continue path)")
             # Display thinking duration, response, and tool results atomically using unified method
+            # Note: when question gate is active, tool_results is None (tools not executed yet)
             self.message_display.display_complete_response(
                 thinking_duration=thinking_duration,
                 response=clean_response,
                 tool_results=tool_results,
-                original_tools=all_tools
+                original_tools=all_tools if not self.question_gate_active else None
             )
             # Log continuation
@@ -1444,13 +2242,14 @@ class LLMService:
         if self.cancel_processing:
             logger.info("API call cancelled before starting")
             raise asyncio.CancelledError("Request cancelled by user")
         # Delegate to API communication service (eliminates ~160 lines of duplicated API code)
         try:
             return await self.api_service.call_llm(
                 conversation_history=self.conversation_history,
                 max_history=self.max_history,
-                streaming_callback=self._handle_streaming_chunk
+                streaming_callback=self._handle_streaming_chunk,
+                tools=self.native_tools  # Native function calling
             )
         except asyncio.CancelledError:
             logger.info("LLM API call was cancelled")
@@ -1480,6 +2279,30 @@ class LLMService:
         logger.debug("Cleaned up streaming state")
+    def reload_config(self) -> None:
+        """Reload configuration values from config service (hot reload support).
+        Called when configuration changes via /config modal or file watcher.
+        Re-reads all cached config values to apply changes without restart.
+        """
+        logger.info("Hot reloading LLM configuration...")
+        # Reload LLM settings
+        self.max_history = self.config.get("core.llm.max_history", 90)
+        # Reload tool executor timeouts
+        self.tool_executor.terminal_timeout = self.config.get("core.llm.terminal_timeout", 120)
+        self.tool_executor.mcp_timeout = self.config.get("core.llm.mcp_timeout", 120)
+        # Reload streaming setting
+        self.api_service.enable_streaming = self.config.get("core.llm.enable_streaming", False)
+        # Note: processing_delay and thinking_delay are already read dynamically each call
+        logger.info(f"Config reloaded: max_history={self.max_history}, "
+                   f"terminal_timeout={self.tool_executor.terminal_timeout}, "
+                   f"mcp_timeout={self.tool_executor.mcp_timeout}, "
+                   f"streaming={self.api_service.enable_streaming}")
     def get_status_line(self) -> Dict[str, List[str]]:
         """Get status information for display."""
@@ -1510,12 +2333,25 @@ class LLMService:
         dropped_indicator = f" ({self.dropped_messages} dropped)" if self.dropped_messages > 0 else ""
         status["C"].append(f"Queue: {queue_size}/{self.max_queue_size} ({queue_utilization:.0f}%){dropped_indicator}")
         # Add warning if queue utilization is high
         if queue_utilization > 80:
             status["C"].append(f"⚠️ Queue usage high!")
         status["C"].append(f"History: {len(self.conversation_history)}")
-        status["C"].append(f"Tasks: {len(self._background_tasks)}")
+        # Show active background tasks with elapsed time
+        if self._background_tasks:
+            # Show up to 3 most recent tasks
+            for task in list(self._background_tasks)[:3]:
+                task_name = task.get_name()
+                if task_name and task_name in self._task_metadata:
+                    elapsed = time.time() - self._task_metadata[task_name]['start_time']
+                    # Format: "⏳ task_name (45s)"
+                    status["C"].append(f"⏳ {task_name} ({elapsed:.0f}s)")
+                elif task_name:
+                    # Task without metadata - just show name
+                    status["C"].append(f"⏳ {task_name}")
         if self._task_error_count > 0:
             status["C"].append(f"Task Errors: {self._task_error_count}")
@@ -1621,9 +2457,4 @@ class LLMService:
         except Exception as e:
             logger.warning(f"MCP shutdown error: {e}")
-        # Save statistics
-        self.state_manager.set("llm.stats", self.stats)
         logger.info("Core LLM Service shutdown complete")

kollabor 0.4.9__py3-none-any.whl → 0.4.15__py3-none-any.whl

kollabor 0.4.9py3-none-any.whl → 0.4.15py3-none-any.whl