PyPI - glaip-sdk - Versions diffs - 0.6.10__py3-none-any.whl → 0.6.19__py3-none-any.whl - Mend

glaip-sdk 0.6.10py3-none-any.whl → 0.6.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

glaip_sdk/__init__.py +42 -5
glaip_sdk/agents/base.py +12 -7
glaip_sdk/cli/commands/common_config.py +14 -11
glaip_sdk/cli/core/output.py +12 -7
glaip_sdk/cli/main.py +15 -4
glaip_sdk/client/agents.py +4 -6
glaip_sdk/client/run_rendering.py +334 -2
glaip_sdk/hitl/__init__.py +15 -0
glaip_sdk/hitl/local.py +151 -0
glaip_sdk/runner/deps.py +5 -8
glaip_sdk/runner/langgraph.py +184 -20
glaip_sdk/utils/rendering/renderer/base.py +58 -0
glaip_sdk/utils/tool_storage_provider.py +140 -0
{glaip_sdk-0.6.10.dist-info → glaip_sdk-0.6.19.dist-info}/METADATA +35 -38
{glaip_sdk-0.6.10.dist-info → glaip_sdk-0.6.19.dist-info}/RECORD +49 -45
{glaip_sdk-0.6.10.dist-info → glaip_sdk-0.6.19.dist-info}/WHEEL +2 -1
glaip_sdk-0.6.19.dist-info/entry_points.txt +2 -0
glaip_sdk-0.6.19.dist-info/top_level.txt +1 -0
glaip_sdk-0.6.10.dist-info/entry_points.txt +0 -3

glaip_sdk/hitl/local.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Local HITL prompt handler with interactive console support.
+Author:
+    Putu Ravindra Wiguna (putu.r.wiguna@gdplabs.id)
+"""
+import os
+from typing import Any
+try:
+    from aip_agents.agent.hitl.prompt.base import BasePromptHandler
+    from aip_agents.schema.hitl import ApprovalDecision, ApprovalDecisionType, ApprovalRequest
+except ImportError as e:
+    raise ImportError("aip_agents is required for local HITL. Install with: pip install 'glaip-sdk[local]'") from e
+from rich.console import Console
+from rich.prompt import Prompt
+class LocalPromptHandler(BasePromptHandler):
+    """Local HITL prompt handler with interactive console prompts.
+    Experimental local HITL implementation with known limitations:
+    - Timeouts are not enforced (interactive prompts wait indefinitely)
+    - Relies on private renderer methods for pause/resume
+    - Only supports interactive terminal environments
+    The key insight from Rich documentation is that Live must be stopped before
+    using Prompt/input(), otherwise the input won't render properly.
+    Environment variables:
+        GLAIP_HITL_AUTO_APPROVE: Set to "true" (case-insensitive) to auto-approve
+            all requests without user interaction. Useful for integration tests and CI.
+    """
+    def __init__(self, *, pause_resume_callback: Any | None = None) -> None:
+        """Initialize the prompt handler.
+        Args:
+            pause_resume_callback: Optional callable with pause() and resume() methods
+                to control the live renderer during prompts. This is needed because
+                Rich Live interferes with Prompt/input().
+        """
+        super().__init__()
+        self._pause_resume = pause_resume_callback
+        self._console = Console()
+    async def prompt_for_decision(
+        self,
+        request: ApprovalRequest,
+        timeout_seconds: int,
+        context_keys: list[str] | None = None,
+    ) -> ApprovalDecision:
+        """Prompt for approval decision with live renderer pause/resume.
+        Supports auto-approval via GLAIP_HITL_AUTO_APPROVE environment variable
+        for integration testing and CI environments. Set to "true" (case-insensitive) to enable.
+        """
+        _ = (timeout_seconds, context_keys)  # Suppress unused parameter warnings.
+        # Check for auto-approve mode (for integration tests/CI)
+        auto_approve = os.getenv("GLAIP_HITL_AUTO_APPROVE", "").lower() == "true"
+        if auto_approve:
+            # Auto-approve without user interaction
+            return ApprovalDecision(
+                request_id=request.request_id,
+                decision=ApprovalDecisionType.APPROVED,
+                operator_input="auto-approved",
+            )
+        # Pause the live renderer if callback is available
+        if self._pause_resume:
+            self._pause_resume.pause()
+        try:
+            # POC/MVP: Show what we're approving (still auto-approve for now)
+            self._print_request_info(request)
+            # POC/MVP: For testing, we can do actual input here
+            # Uncomment to enable real prompting:
+            response = Prompt.ask(
+                "\n[yellow]Approve this tool call?[/yellow] [dim](y/n/s)[/dim]",
+                console=self._console,
+                default="y",
+            )
+            response = response.lower().strip()
+            if response in ("y", "yes"):
+                decision = ApprovalDecisionType.APPROVED
+            elif response in ("n", "no"):
+                decision = ApprovalDecisionType.REJECTED
+            else:
+                decision = ApprovalDecisionType.SKIPPED
+            return ApprovalDecision(
+                request_id=request.request_id,
+                decision=decision,
+                operator_input=response if decision != ApprovalDecisionType.SKIPPED else None,
+            )
+        finally:
+            # Always resume the live renderer
+            if self._pause_resume:
+                self._pause_resume.resume()
+    def _print_request_info(self, request: ApprovalRequest) -> None:
+        """Print the approval request information."""
+        self._console.print()
+        self._console.rule("[yellow]HITL Approval Request[/yellow]", style="yellow")
+        tool_name = request.tool_name or "unknown"
+        self._console.print(f"[cyan]Tool:[/cyan] {tool_name}")
+        if hasattr(request, "arguments_preview") and request.arguments_preview:
+            self._console.print(f"[cyan]Arguments:[/cyan] {request.arguments_preview}")
+        if request.context:
+            self._console.print(f"[dim]Context: {request.context}[/dim]")
+class PauseResumeCallback:
+    """Simple callback object for pausing/resuming the live renderer.
+    This allows the LocalPromptHandler to control the renderer without
+    directly coupling to the renderer implementation.
+    """
+    def __init__(self) -> None:
+        """Initialize the callback."""
+        self._renderer: Any | None = None
+    def set_renderer(self, renderer: Any) -> None:
+        """Set the renderer instance.
+        Args:
+            renderer: RichStreamRenderer instance with pause_live() and resume_live() methods.
+        """
+        self._renderer = renderer
+    def pause(self) -> None:
+        """Pause the live renderer before prompting."""
+        if self._renderer and hasattr(self._renderer, "_shutdown_live"):
+            self._renderer._shutdown_live()
+    def resume(self) -> None:
+        """Resume the live renderer after prompting."""
+        if self._renderer and hasattr(self._renderer, "_ensure_live"):
+            self._renderer._ensure_live()
+__all__ = ["LocalPromptHandler", "PauseResumeCallback"]

glaip_sdk/runner/deps.py CHANGED Viewed

@@ -15,6 +15,8 @@ Example:
 from __future__ import annotations
+import importlib.util
 from gllm_core.utils import LoggerManager
 logger = LoggerManager().get_logger(__name__)
@@ -24,17 +26,12 @@ _local_runtime_available: bool | None = None
 def _probe_aip_agents_import() -> bool:
-    """Attempt to import aip_agents and return success status.
+    """Check if aip_agents is available without importing it.
     Returns:
-        True if aip_agents can be imported successfully, False otherwise.
+        True if aip_agents appears importable, False otherwise.
     """
-    try:
-        import aip_agents  # noqa: F401, PLC0415
-        return True
-    except ImportError:
-        return False
+    return importlib.util.find_spec("aip_agents") is not None
 def check_local_runtime_available() -> bool:

glaip_sdk/runner/langgraph.py CHANGED Viewed

@@ -19,26 +19,56 @@ from __future__ import annotations
 import asyncio
 import inspect
+import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
+from aip_agents.agent.hitl.manager import ApprovalManager  # noqa: PLC0415
+from gllm_core.utils import LoggerManager
+from glaip_sdk.client.run_rendering import AgentRunRenderingManager
+from glaip_sdk.hitl import LocalPromptHandler, PauseResumeCallback
 from glaip_sdk.runner.base import BaseRunner
 from glaip_sdk.runner.deps import (
     check_local_runtime_available,
     get_local_runtime_missing_message,
 )
-from glaip_sdk.utils.a2a import A2AEventStreamProcessor
-from gllm_core.utils import LoggerManager
+from glaip_sdk.utils.tool_storage_provider import build_tool_output_manager
 if TYPE_CHECKING:
     from langchain_core.messages import BaseMessage
     from glaip_sdk.agents.base import Agent
-logger = LoggerManager().get_logger(__name__)
-# Default A2A event processor
-_event_processor = A2AEventStreamProcessor()
+_AIP_LOGS_SWALLOWED = False
+def _swallow_aip_logs(level: int = logging.ERROR) -> None:
+    """Consume noisy AIPAgents logs once (opt-in via runner flag)."""
+    global _AIP_LOGS_SWALLOWED
+    if _AIP_LOGS_SWALLOWED:
+        return
+    prefixes = ("aip_agents.",)
+    def _silence(name: str) -> None:
+        lg = logging.getLogger(name)
+        lg.handlers = [logging.NullHandler()]
+        lg.propagate = False
+        lg.setLevel(level)
+    # Silence any already-registered loggers under the given prefixes
+    for logger_name in logging.root.manager.loggerDict:
+        if any(logger_name.startswith(prefix) for prefix in prefixes):
+            _silence(logger_name)
+    # Also set the base prefix loggers so future children inherit silence
+    for prefix in prefixes:
+        _silence(prefix.rstrip("."))
+    _AIP_LOGS_SWALLOWED = True
+logger = LoggerManager().get_logger(__name__)
 def _convert_chat_history_to_messages(
@@ -81,9 +111,10 @@ def _convert_chat_history_to_messages(
 class LangGraphRunner(BaseRunner):
     """Runner implementation using aip-agents LangGraphReactAgent.
-    MVP scope:
-    - Execute via `LangGraphReactAgent.arun_a2a_stream()`
-    - Extract and return final text from the emitted `final_response` event
+    Current behavior:
+    - Execute via `LangGraphReactAgent.arun_sse_stream()` (normalized SSE-compatible stream)
+    - Route all events through `AgentRunRenderingManager.async_process_stream_events`
+      for unified rendering between local and remote agents
     Attributes:
         default_model: Model name to use when agent.model is not set.
@@ -99,6 +130,8 @@ class LangGraphRunner(BaseRunner):
         verbose: bool = False,
         runtime_config: dict[str, Any] | None = None,
         chat_history: list[dict[str, str]] | None = None,
+        *,
+        swallow_aip_logs: bool = True,
         **kwargs: Any,
     ) -> str:
         """Execute agent synchronously and return final response text.
@@ -113,6 +146,9 @@ class LangGraphRunner(BaseRunner):
             chat_history: Optional list of prior conversation messages.
                 Each message is a dict with "role" and "content" keys.
                 Defaults to None.
+            swallow_aip_logs: When True (default), silence noisy logs from aip-agents,
+                gllm_inference, OpenAILMInvoker, and httpx. Set to False to honor user
+                logging configuration.
             **kwargs: Additional keyword arguments passed to the backend.
         Returns:
@@ -141,6 +177,7 @@ class LangGraphRunner(BaseRunner):
             verbose=verbose,
             runtime_config=runtime_config,
             chat_history=chat_history,
+            swallow_aip_logs=swallow_aip_logs,
             **kwargs,
         )
@@ -153,6 +190,8 @@ class LangGraphRunner(BaseRunner):
         verbose: bool = False,
         runtime_config: dict[str, Any] | None = None,
         chat_history: list[dict[str, str]] | None = None,
+        *,
+        swallow_aip_logs: bool = True,
         **kwargs: Any,
     ) -> str:
         """Execute agent asynchronously and return final response text.
@@ -167,6 +206,7 @@ class LangGraphRunner(BaseRunner):
             chat_history: Optional list of prior conversation messages.
                 Each message is a dict with "role" and "content" keys.
                 Defaults to None.
+            swallow_aip_logs: When True (default), silence noisy AIPAgents logs.
             **kwargs: Additional keyword arguments passed to the backend.
         Returns:
@@ -181,6 +221,7 @@ class LangGraphRunner(BaseRunner):
             verbose=verbose,
             runtime_config=runtime_config,
             chat_history=chat_history,
+            swallow_aip_logs=swallow_aip_logs,
             **kwargs,
         )
@@ -191,6 +232,8 @@ class LangGraphRunner(BaseRunner):
         verbose: bool = False,
         runtime_config: dict[str, Any] | None = None,
         chat_history: list[dict[str, str]] | None = None,
+        *,
+        swallow_aip_logs: bool = True,
         **kwargs: Any,
     ) -> str:
         """Internal async implementation of agent execution.
@@ -201,13 +244,23 @@ class LangGraphRunner(BaseRunner):
             verbose: If True, emit debug trace output during execution.
             runtime_config: Optional runtime configuration for tools, MCPs, etc.
             chat_history: Optional list of prior conversation messages.
+            swallow_aip_logs: When True (default), silence noisy AIPAgents logs.
             **kwargs: Additional keyword arguments passed to the backend.
         Returns:
             The final response text from the agent.
         """
+        # Optionally swallow noisy AIPAgents logs
+        if swallow_aip_logs:
+            _swallow_aip_logs()
+        # POC/MVP: Create pause/resume callback for interactive HITL input
+        pause_resume_callback = PauseResumeCallback()
         # Build the local LangGraphReactAgent from the glaip_sdk Agent
-        local_agent = self.build_langgraph_agent(agent, runtime_config=runtime_config)
+        local_agent = self.build_langgraph_agent(
+            agent, runtime_config=runtime_config, pause_resume_callback=pause_resume_callback
+        )
         # Convert chat history to LangChain messages for the agent
         langchain_messages = _convert_chat_history_to_messages(chat_history)
@@ -219,20 +272,54 @@ class LangGraphRunner(BaseRunner):
                 agent.name,
             )
-        # Collect A2AEvents from the stream and extract final response
-        events: list[dict[str, Any]] = []
+        # Use shared render manager for unified processing
+        render_manager = AgentRunRenderingManager(logger)
+        renderer = render_manager.create_renderer(kwargs.get("renderer"), verbose=verbose)
-        async for event in local_agent.arun_a2a_stream(message, **kwargs):
-            if verbose:
-                self._log_event(event)
-            events.append(event)
+        # POC/MVP: Set renderer on callback so LocalPromptHandler can pause/resume Live
+        pause_resume_callback.set_renderer(renderer)
-        return _event_processor.extract_final_response(events)
+        meta = render_manager.build_initial_metadata(agent.name, message, kwargs)
+        render_manager.start_renderer(renderer, meta)
+        try:
+            # Use shared async stream processor for unified event handling
+            (
+                final_text,
+                stats_usage,
+                started_monotonic,
+                finished_monotonic,
+            ) = await render_manager.async_process_stream_events(
+                local_agent.arun_sse_stream(message, **kwargs),
+                renderer,
+                meta,
+                skip_final_render=True,
+            )
+        except KeyboardInterrupt:
+            try:
+                renderer.close()
+            finally:
+                raise
+        except Exception:
+            try:
+                renderer.close()
+            finally:
+                raise
+        # Use shared finalizer to avoid code duplication
+        from glaip_sdk.client.run_rendering import finalize_render_manager  # noqa: PLC0415
+        return finalize_render_manager(
+            render_manager, renderer, final_text, stats_usage, started_monotonic, finished_monotonic
+        )
     def build_langgraph_agent(
         self,
         agent: Agent,
         runtime_config: dict[str, Any] | None = None,
+        shared_tool_output_manager: Any | None = None,
+        *,
+        pause_resume_callback: Any | None = None,
     ) -> Any:
         """Build a LangGraphReactAgent from a glaip_sdk Agent definition.
@@ -240,6 +327,10 @@ class LangGraphRunner(BaseRunner):
             agent: The glaip_sdk Agent to convert.
             runtime_config: Optional runtime configuration with tool_configs,
                 mcp_configs, agent_config, and agent-specific overrides.
+            shared_tool_output_manager: Optional ToolOutputManager to reuse across
+                agents with tool_output_sharing enabled.
+            pause_resume_callback: Optional callback used to pause/resume the renderer
+                during interactive HITL prompts.
         Returns:
             A configured LangGraphReactAgent instance.
@@ -249,6 +340,7 @@ class LangGraphRunner(BaseRunner):
             ValueError: If agent has unsupported tools, MCPs, or sub-agents for local mode.
         """
         from aip_agents.agent import LangGraphReactAgent  # noqa: PLC0415
         from glaip_sdk.runner.tool_adapter import LangChainToolAdapter  # noqa: PLC0415
         # Adapt tools for local execution
@@ -260,9 +352,6 @@ class LangGraphRunner(BaseRunner):
             adapter = LangChainToolAdapter()
             langchain_tools = adapter.adapt_tools(agent.tools)
-        # Build sub-agents recursively
-        sub_agent_instances = self._build_sub_agents(agent.agents, runtime_config)
         # Normalize runtime config: merge global and agent-specific configs
         normalized_config = self._normalize_runtime_config(runtime_config, agent)
@@ -276,6 +365,19 @@ class LangGraphRunner(BaseRunner):
         merged_agent_config = self._merge_agent_config(agent, normalized_config)
         agent_config_params, agent_config_kwargs = self._apply_agent_config(merged_agent_config)
+        tool_output_manager = self._resolve_tool_output_manager(
+            agent,
+            merged_agent_config,
+            shared_tool_output_manager,
+        )
+        # Build sub-agents recursively, sharing tool output manager when enabled.
+        sub_agent_instances = self._build_sub_agents(
+            agent.agents,
+            runtime_config,
+            shared_tool_output_manager=tool_output_manager,
+        )
         # Build the LangGraphReactAgent with tools, sub-agents, and configs
         local_agent = LangGraphReactAgent(
             name=agent.name,
@@ -285,6 +387,7 @@ class LangGraphRunner(BaseRunner):
             tools=langchain_tools,
             agents=sub_agent_instances if sub_agent_instances else None,
             tool_configs=tool_configs if tool_configs else None,
+            tool_output_manager=tool_output_manager,
             **agent_config_params,
             **agent_config_kwargs,
         )
@@ -292,6 +395,11 @@ class LangGraphRunner(BaseRunner):
         # Add MCP servers if configured
         self._add_mcp_servers(local_agent, agent, mcp_configs)
+        # Inject local HITL manager only if hitl_enabled is True (master switch).
+        # This matches remote behavior: hitl_enabled gates the HITL plumbing.
+        # Tool-level HITL configs are only enforced when hitl_enabled=True.
+        self._inject_hitl_manager(local_agent, merged_agent_config, agent.name, pause_resume_callback)
         logger.debug(
             "Built local LangGraphReactAgent for agent '%s' with %d tools, %d sub-agents, and %d MCPs",
             agent.name,
@@ -301,16 +409,60 @@ class LangGraphRunner(BaseRunner):
         )
         return local_agent
+    def _resolve_tool_output_manager(
+        self,
+        agent: Agent,
+        merged_agent_config: dict[str, Any],
+        shared_tool_output_manager: Any | None,
+    ) -> Any | None:
+        """Resolve tool output manager for local agent execution."""
+        tool_output_sharing_enabled = merged_agent_config.get("tool_output_sharing", False)
+        if not tool_output_sharing_enabled:
+            return None
+        if shared_tool_output_manager is not None:
+            return shared_tool_output_manager
+        return build_tool_output_manager(agent.name, merged_agent_config)
+    def _inject_hitl_manager(
+        self,
+        local_agent: Any,
+        merged_agent_config: dict[str, Any],
+        agent_name: str,
+        pause_resume_callback: Any | None,
+    ) -> None:
+        """Inject HITL manager when enabled, mirroring remote gating behavior."""
+        hitl_enabled = merged_agent_config.get("hitl_enabled", False)
+        if hitl_enabled:
+            try:
+                local_agent.hitl_manager = ApprovalManager(
+                    prompt_handler=LocalPromptHandler(pause_resume_callback=pause_resume_callback)
+                )
+                # Store callback reference for setting renderer later
+                if pause_resume_callback:
+                    local_agent._pause_resume_callback = pause_resume_callback
+                logger.debug("HITL manager injected for agent '%s' (hitl_enabled=True)", agent_name)
+            except ImportError as e:
+                # Missing dependencies - fail fast
+                raise ImportError("Local HITL requires aip_agents. Install with: pip install 'glaip-sdk[local]'") from e
+            except Exception as e:
+                # Other errors during HITL setup - fail fast
+                raise RuntimeError(f"Failed to initialize HITL manager for agent '{agent_name}'") from e
+        else:
+            logger.debug("HITL manager not injected for agent '%s' (hitl_enabled=False)", agent_name)
     def _build_sub_agents(
         self,
         sub_agents: list[Any] | None,
         runtime_config: dict[str, Any] | None,
+        shared_tool_output_manager: Any | None = None,
     ) -> list[Any]:
         """Build sub-agent instances recursively.
         Args:
             sub_agents: List of sub-agent definitions.
             runtime_config: Runtime config to pass to sub-agents.
+            shared_tool_output_manager: Optional ToolOutputManager to reuse across
+                agents with tool_output_sharing enabled.
         Returns:
             List of built sub-agent instances.
@@ -324,7 +476,13 @@ class LangGraphRunner(BaseRunner):
         sub_agent_instances = []
         for sub_agent in sub_agents:
             self._validate_sub_agent_for_local_mode(sub_agent)
-            sub_agent_instances.append(self.build_langgraph_agent(sub_agent, runtime_config))
+            sub_agent_instances.append(
+                self.build_langgraph_agent(
+                    sub_agent,
+                    runtime_config,
+                    shared_tool_output_manager=shared_tool_output_manager,
+                )
+            )
         return sub_agent_instances
     def _add_mcp_servers(
@@ -571,7 +729,13 @@ class LangGraphRunner(BaseRunner):
         if "planning" in agent_config:
             direct_params["planning"] = agent_config["planning"]
+        if "enable_a2a_token_streaming" in agent_config:
+            direct_params["enable_a2a_token_streaming"] = agent_config["enable_a2a_token_streaming"]
         # Kwargs parameters (passed through **kwargs to BaseAgent)
+        if "enable_pii" in agent_config:
+            kwargs_params["enable_pii"] = agent_config["enable_pii"]
         if "memory" in agent_config:
             # Map "memory" to "memory_backend" for aip-agents compatibility
             kwargs_params["memory_backend"] = agent_config["memory"]

glaip_sdk/utils/rendering/renderer/base.py CHANGED Viewed

@@ -8,6 +8,7 @@ from __future__ import annotations
 import json
 import logging
+import sys
 from datetime import datetime, timezone
 from time import monotonic
 from typing import Any
@@ -349,6 +350,9 @@ class RichStreamRenderer(TranscriptModeMixin):
             self._handle_status_event(ev)
         elif kind == "content":
             self._handle_content_event(content)
+        elif kind == "token":
+            # Token events should stream content incrementally with immediate console output
+            self._handle_token_event(content)
         elif kind == "final_response":
             self._handle_final_response_event(content, metadata)
         elif kind in {"agent_step", "agent_thinking_step"}:
@@ -368,6 +372,31 @@ class RichStreamRenderer(TranscriptModeMixin):
             self.state.append_transcript_text(content)
             self._ensure_live()
+    def _handle_token_event(self, content: str) -> None:
+        """Handle token streaming events - print immediately for real-time streaming."""
+        if content:
+            self.state.append_transcript_text(content)
+            # Print token content directly to stdout for immediate visibility when not verbose
+            # This bypasses Rich's Live display which has refresh rate limitations
+            if not self.verbose:
+                try:
+                    # Mark that we're streaming tokens directly to prevent Live display from starting
+                    self._streaming_tokens_directly = True
+                    # Stop Live display if active to prevent it from intercepting stdout
+                    # and causing each token to appear on a new line
+                    if self.live is not None:
+                        self._stop_live_display()
+                    # Write directly to stdout - tokens will stream on the same line
+                    # since we're bypassing Rich's console which adds newlines
+                    sys.stdout.write(content)
+                    sys.stdout.flush()
+                except Exception:
+                    # Fallback to live display if direct write fails
+                    self._ensure_live()
+            else:
+                # In verbose mode, use normal live display (debug panels handle the output)
+                self._ensure_live()
     def _handle_final_response_event(self, content: str, metadata: dict[str, Any]) -> None:
         """Handle final response events."""
         if content:
@@ -521,6 +550,18 @@ class RichStreamRenderer(TranscriptModeMixin):
         if getattr(self, "_transcript_mode_enabled", False):
             return
+        # When verbose=False and tokens were streamed directly, skip final panel
+        # The user's script will print the final result, avoiding duplication
+        if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
+            # Add a newline after streaming tokens for clean separation
+            try:
+                sys.stdout.write("\n")
+                sys.stdout.flush()
+            except Exception:
+                pass
+            self.state.printed_final_output = True
+            return
         if self.verbose:
             panel = build_final_panel(
                 self.state,
@@ -597,6 +638,19 @@ class RichStreamRenderer(TranscriptModeMixin):
     def _finalize_display(self) -> None:
         """Finalize live display and render final output."""
+        # When verbose=False and tokens were streamed directly, skip live display updates
+        # to avoid showing duplicate final result
+        if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
+            # Just add a newline after streaming tokens for clean separation
+            try:
+                sys.stdout.write("\n")
+                sys.stdout.flush()
+            except Exception:
+                pass
+            self._stop_live_display()
+            self.state.printed_final_output = True
+            return
         # Final refresh
         self._ensure_live()
@@ -629,6 +683,10 @@ class RichStreamRenderer(TranscriptModeMixin):
         """Ensure live display is updated."""
         if getattr(self, "_transcript_mode_enabled", False):
             return
+        # When verbose=False, don't start Live display if we're streaming tokens directly
+        # This prevents Live from intercepting stdout and causing tokens to appear on separate lines
+        if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
+            return
         if not self._ensure_live_stack():
             return

glaip-sdk 0.6.10__py3-none-any.whl → 0.6.19__py3-none-any.whl

glaip-sdk 0.6.10py3-none-any.whl → 0.6.19py3-none-any.whl