glaip-sdk 0.6.10__py3-none-any.whl → 0.6.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ """Local HITL prompt handler with interactive console support.
2
+
3
+ Author:
4
+ Putu Ravindra Wiguna (putu.r.wiguna@gdplabs.id)
5
+ """
6
+
7
+ import os
8
+ from typing import Any
9
+
10
+ try:
11
+ from aip_agents.agent.hitl.prompt.base import BasePromptHandler
12
+ from aip_agents.schema.hitl import ApprovalDecision, ApprovalDecisionType, ApprovalRequest
13
+ except ImportError as e:
14
+ raise ImportError("aip_agents is required for local HITL. Install with: pip install 'glaip-sdk[local]'") from e
15
+
16
+ from rich.console import Console
17
+ from rich.prompt import Prompt
18
+
19
+
20
+ class LocalPromptHandler(BasePromptHandler):
21
+ """Local HITL prompt handler with interactive console prompts.
22
+
23
+ Experimental local HITL implementation with known limitations:
24
+ - Timeouts are not enforced (interactive prompts wait indefinitely)
25
+ - Relies on private renderer methods for pause/resume
26
+ - Only supports interactive terminal environments
27
+
28
+ The key insight from Rich documentation is that Live must be stopped before
29
+ using Prompt/input(), otherwise the input won't render properly.
30
+
31
+ Environment variables:
32
+ GLAIP_HITL_AUTO_APPROVE: Set to "true" (case-insensitive) to auto-approve
33
+ all requests without user interaction. Useful for integration tests and CI.
34
+ """
35
+
36
+ def __init__(self, *, pause_resume_callback: Any | None = None) -> None:
37
+ """Initialize the prompt handler.
38
+
39
+ Args:
40
+ pause_resume_callback: Optional callable with pause() and resume() methods
41
+ to control the live renderer during prompts. This is needed because
42
+ Rich Live interferes with Prompt/input().
43
+ """
44
+ super().__init__()
45
+ self._pause_resume = pause_resume_callback
46
+ self._console = Console()
47
+
48
+ async def prompt_for_decision(
49
+ self,
50
+ request: ApprovalRequest,
51
+ timeout_seconds: int,
52
+ context_keys: list[str] | None = None,
53
+ ) -> ApprovalDecision:
54
+ """Prompt for approval decision with live renderer pause/resume.
55
+
56
+ Supports auto-approval via GLAIP_HITL_AUTO_APPROVE environment variable
57
+ for integration testing and CI environments. Set to "true" (case-insensitive) to enable.
58
+ """
59
+ _ = (timeout_seconds, context_keys) # Suppress unused parameter warnings.
60
+
61
+ # Check for auto-approve mode (for integration tests/CI)
62
+ auto_approve = os.getenv("GLAIP_HITL_AUTO_APPROVE", "").lower() == "true"
63
+
64
+ if auto_approve:
65
+ # Auto-approve without user interaction
66
+ return ApprovalDecision(
67
+ request_id=request.request_id,
68
+ decision=ApprovalDecisionType.APPROVED,
69
+ operator_input="auto-approved",
70
+ )
71
+
72
+ # Pause the live renderer if callback is available
73
+ if self._pause_resume:
74
+ self._pause_resume.pause()
75
+
76
+ try:
77
+ # POC/MVP: Show what we're approving (still auto-approve for now)
78
+ self._print_request_info(request)
79
+
80
+ # POC/MVP: For testing, we can do actual input here
81
+ # Uncomment to enable real prompting:
82
+ response = Prompt.ask(
83
+ "\n[yellow]Approve this tool call?[/yellow] [dim](y/n/s)[/dim]",
84
+ console=self._console,
85
+ default="y",
86
+ )
87
+ response = response.lower().strip()
88
+
89
+ if response in ("y", "yes"):
90
+ decision = ApprovalDecisionType.APPROVED
91
+ elif response in ("n", "no"):
92
+ decision = ApprovalDecisionType.REJECTED
93
+ else:
94
+ decision = ApprovalDecisionType.SKIPPED
95
+
96
+ return ApprovalDecision(
97
+ request_id=request.request_id,
98
+ decision=decision,
99
+ operator_input=response if decision != ApprovalDecisionType.SKIPPED else None,
100
+ )
101
+ finally:
102
+ # Always resume the live renderer
103
+ if self._pause_resume:
104
+ self._pause_resume.resume()
105
+
106
+ def _print_request_info(self, request: ApprovalRequest) -> None:
107
+ """Print the approval request information."""
108
+ self._console.print()
109
+ self._console.rule("[yellow]HITL Approval Request[/yellow]", style="yellow")
110
+
111
+ tool_name = request.tool_name or "unknown"
112
+ self._console.print(f"[cyan]Tool:[/cyan] {tool_name}")
113
+
114
+ if hasattr(request, "arguments_preview") and request.arguments_preview:
115
+ self._console.print(f"[cyan]Arguments:[/cyan] {request.arguments_preview}")
116
+
117
+ if request.context:
118
+ self._console.print(f"[dim]Context: {request.context}[/dim]")
119
+
120
+
121
+ class PauseResumeCallback:
122
+ """Simple callback object for pausing/resuming the live renderer.
123
+
124
+ This allows the LocalPromptHandler to control the renderer without
125
+ directly coupling to the renderer implementation.
126
+ """
127
+
128
+ def __init__(self) -> None:
129
+ """Initialize the callback."""
130
+ self._renderer: Any | None = None
131
+
132
+ def set_renderer(self, renderer: Any) -> None:
133
+ """Set the renderer instance.
134
+
135
+ Args:
136
+ renderer: RichStreamRenderer instance with pause_live() and resume_live() methods.
137
+ """
138
+ self._renderer = renderer
139
+
140
+ def pause(self) -> None:
141
+ """Pause the live renderer before prompting."""
142
+ if self._renderer and hasattr(self._renderer, "_shutdown_live"):
143
+ self._renderer._shutdown_live()
144
+
145
+ def resume(self) -> None:
146
+ """Resume the live renderer after prompting."""
147
+ if self._renderer and hasattr(self._renderer, "_ensure_live"):
148
+ self._renderer._ensure_live()
149
+
150
+
151
+ __all__ = ["LocalPromptHandler", "PauseResumeCallback"]
glaip_sdk/runner/deps.py CHANGED
@@ -15,6 +15,8 @@ Example:
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
+ import importlib.util
19
+
18
20
  from gllm_core.utils import LoggerManager
19
21
 
20
22
  logger = LoggerManager().get_logger(__name__)
@@ -24,17 +26,12 @@ _local_runtime_available: bool | None = None
24
26
 
25
27
 
26
28
  def _probe_aip_agents_import() -> bool:
27
- """Attempt to import aip_agents and return success status.
29
+ """Check if aip_agents is available without importing it.
28
30
 
29
31
  Returns:
30
- True if aip_agents can be imported successfully, False otherwise.
32
+ True if aip_agents appears importable, False otherwise.
31
33
  """
32
- try:
33
- import aip_agents # noqa: F401, PLC0415
34
-
35
- return True
36
- except ImportError:
37
- return False
34
+ return importlib.util.find_spec("aip_agents") is not None
38
35
 
39
36
 
40
37
  def check_local_runtime_available() -> bool:
@@ -19,26 +19,56 @@ from __future__ import annotations
19
19
 
20
20
  import asyncio
21
21
  import inspect
22
+ import logging
22
23
  from dataclasses import dataclass
23
24
  from typing import TYPE_CHECKING, Any
24
25
 
26
+ from aip_agents.agent.hitl.manager import ApprovalManager # noqa: PLC0415
27
+ from gllm_core.utils import LoggerManager
28
+
29
+ from glaip_sdk.client.run_rendering import AgentRunRenderingManager
30
+ from glaip_sdk.hitl import LocalPromptHandler, PauseResumeCallback
25
31
  from glaip_sdk.runner.base import BaseRunner
26
32
  from glaip_sdk.runner.deps import (
27
33
  check_local_runtime_available,
28
34
  get_local_runtime_missing_message,
29
35
  )
30
- from glaip_sdk.utils.a2a import A2AEventStreamProcessor
31
- from gllm_core.utils import LoggerManager
36
+ from glaip_sdk.utils.tool_storage_provider import build_tool_output_manager
32
37
 
33
38
  if TYPE_CHECKING:
34
39
  from langchain_core.messages import BaseMessage
35
40
 
36
41
  from glaip_sdk.agents.base import Agent
37
42
 
38
- logger = LoggerManager().get_logger(__name__)
39
43
 
40
- # Default A2A event processor
41
- _event_processor = A2AEventStreamProcessor()
44
+ _AIP_LOGS_SWALLOWED = False
45
+
46
+
47
+ def _swallow_aip_logs(level: int = logging.ERROR) -> None:
48
+ """Consume noisy AIPAgents logs once (opt-in via runner flag)."""
49
+ global _AIP_LOGS_SWALLOWED
50
+ if _AIP_LOGS_SWALLOWED:
51
+ return
52
+ prefixes = ("aip_agents.",)
53
+
54
+ def _silence(name: str) -> None:
55
+ lg = logging.getLogger(name)
56
+ lg.handlers = [logging.NullHandler()]
57
+ lg.propagate = False
58
+ lg.setLevel(level)
59
+
60
+ # Silence any already-registered loggers under the given prefixes
61
+ for logger_name in logging.root.manager.loggerDict:
62
+ if any(logger_name.startswith(prefix) for prefix in prefixes):
63
+ _silence(logger_name)
64
+
65
+ # Also set the base prefix loggers so future children inherit silence
66
+ for prefix in prefixes:
67
+ _silence(prefix.rstrip("."))
68
+ _AIP_LOGS_SWALLOWED = True
69
+
70
+
71
+ logger = LoggerManager().get_logger(__name__)
42
72
 
43
73
 
44
74
  def _convert_chat_history_to_messages(
@@ -81,9 +111,10 @@ def _convert_chat_history_to_messages(
81
111
  class LangGraphRunner(BaseRunner):
82
112
  """Runner implementation using aip-agents LangGraphReactAgent.
83
113
 
84
- MVP scope:
85
- - Execute via `LangGraphReactAgent.arun_a2a_stream()`
86
- - Extract and return final text from the emitted `final_response` event
114
+ Current behavior:
115
+ - Execute via `LangGraphReactAgent.arun_sse_stream()` (normalized SSE-compatible stream)
116
+ - Route all events through `AgentRunRenderingManager.async_process_stream_events`
117
+ for unified rendering between local and remote agents
87
118
 
88
119
  Attributes:
89
120
  default_model: Model name to use when agent.model is not set.
@@ -99,6 +130,8 @@ class LangGraphRunner(BaseRunner):
99
130
  verbose: bool = False,
100
131
  runtime_config: dict[str, Any] | None = None,
101
132
  chat_history: list[dict[str, str]] | None = None,
133
+ *,
134
+ swallow_aip_logs: bool = True,
102
135
  **kwargs: Any,
103
136
  ) -> str:
104
137
  """Execute agent synchronously and return final response text.
@@ -113,6 +146,9 @@ class LangGraphRunner(BaseRunner):
113
146
  chat_history: Optional list of prior conversation messages.
114
147
  Each message is a dict with "role" and "content" keys.
115
148
  Defaults to None.
149
+ swallow_aip_logs: When True (default), silence noisy logs from aip-agents,
150
+ gllm_inference, OpenAILMInvoker, and httpx. Set to False to honor user
151
+ logging configuration.
116
152
  **kwargs: Additional keyword arguments passed to the backend.
117
153
 
118
154
  Returns:
@@ -141,6 +177,7 @@ class LangGraphRunner(BaseRunner):
141
177
  verbose=verbose,
142
178
  runtime_config=runtime_config,
143
179
  chat_history=chat_history,
180
+ swallow_aip_logs=swallow_aip_logs,
144
181
  **kwargs,
145
182
  )
146
183
 
@@ -153,6 +190,8 @@ class LangGraphRunner(BaseRunner):
153
190
  verbose: bool = False,
154
191
  runtime_config: dict[str, Any] | None = None,
155
192
  chat_history: list[dict[str, str]] | None = None,
193
+ *,
194
+ swallow_aip_logs: bool = True,
156
195
  **kwargs: Any,
157
196
  ) -> str:
158
197
  """Execute agent asynchronously and return final response text.
@@ -167,6 +206,7 @@ class LangGraphRunner(BaseRunner):
167
206
  chat_history: Optional list of prior conversation messages.
168
207
  Each message is a dict with "role" and "content" keys.
169
208
  Defaults to None.
209
+ swallow_aip_logs: When True (default), silence noisy AIPAgents logs.
170
210
  **kwargs: Additional keyword arguments passed to the backend.
171
211
 
172
212
  Returns:
@@ -181,6 +221,7 @@ class LangGraphRunner(BaseRunner):
181
221
  verbose=verbose,
182
222
  runtime_config=runtime_config,
183
223
  chat_history=chat_history,
224
+ swallow_aip_logs=swallow_aip_logs,
184
225
  **kwargs,
185
226
  )
186
227
 
@@ -191,6 +232,8 @@ class LangGraphRunner(BaseRunner):
191
232
  verbose: bool = False,
192
233
  runtime_config: dict[str, Any] | None = None,
193
234
  chat_history: list[dict[str, str]] | None = None,
235
+ *,
236
+ swallow_aip_logs: bool = True,
194
237
  **kwargs: Any,
195
238
  ) -> str:
196
239
  """Internal async implementation of agent execution.
@@ -201,13 +244,23 @@ class LangGraphRunner(BaseRunner):
201
244
  verbose: If True, emit debug trace output during execution.
202
245
  runtime_config: Optional runtime configuration for tools, MCPs, etc.
203
246
  chat_history: Optional list of prior conversation messages.
247
+ swallow_aip_logs: When True (default), silence noisy AIPAgents logs.
204
248
  **kwargs: Additional keyword arguments passed to the backend.
205
249
 
206
250
  Returns:
207
251
  The final response text from the agent.
208
252
  """
253
+ # Optionally swallow noisy AIPAgents logs
254
+ if swallow_aip_logs:
255
+ _swallow_aip_logs()
256
+
257
+ # POC/MVP: Create pause/resume callback for interactive HITL input
258
+ pause_resume_callback = PauseResumeCallback()
259
+
209
260
  # Build the local LangGraphReactAgent from the glaip_sdk Agent
210
- local_agent = self.build_langgraph_agent(agent, runtime_config=runtime_config)
261
+ local_agent = self.build_langgraph_agent(
262
+ agent, runtime_config=runtime_config, pause_resume_callback=pause_resume_callback
263
+ )
211
264
 
212
265
  # Convert chat history to LangChain messages for the agent
213
266
  langchain_messages = _convert_chat_history_to_messages(chat_history)
@@ -219,20 +272,54 @@ class LangGraphRunner(BaseRunner):
219
272
  agent.name,
220
273
  )
221
274
 
222
- # Collect A2AEvents from the stream and extract final response
223
- events: list[dict[str, Any]] = []
275
+ # Use shared render manager for unified processing
276
+ render_manager = AgentRunRenderingManager(logger)
277
+ renderer = render_manager.create_renderer(kwargs.get("renderer"), verbose=verbose)
224
278
 
225
- async for event in local_agent.arun_a2a_stream(message, **kwargs):
226
- if verbose:
227
- self._log_event(event)
228
- events.append(event)
279
+ # POC/MVP: Set renderer on callback so LocalPromptHandler can pause/resume Live
280
+ pause_resume_callback.set_renderer(renderer)
229
281
 
230
- return _event_processor.extract_final_response(events)
282
+ meta = render_manager.build_initial_metadata(agent.name, message, kwargs)
283
+ render_manager.start_renderer(renderer, meta)
284
+
285
+ try:
286
+ # Use shared async stream processor for unified event handling
287
+ (
288
+ final_text,
289
+ stats_usage,
290
+ started_monotonic,
291
+ finished_monotonic,
292
+ ) = await render_manager.async_process_stream_events(
293
+ local_agent.arun_sse_stream(message, **kwargs),
294
+ renderer,
295
+ meta,
296
+ skip_final_render=True,
297
+ )
298
+ except KeyboardInterrupt:
299
+ try:
300
+ renderer.close()
301
+ finally:
302
+ raise
303
+ except Exception:
304
+ try:
305
+ renderer.close()
306
+ finally:
307
+ raise
308
+
309
+ # Use shared finalizer to avoid code duplication
310
+ from glaip_sdk.client.run_rendering import finalize_render_manager # noqa: PLC0415
311
+
312
+ return finalize_render_manager(
313
+ render_manager, renderer, final_text, stats_usage, started_monotonic, finished_monotonic
314
+ )
231
315
 
232
316
  def build_langgraph_agent(
233
317
  self,
234
318
  agent: Agent,
235
319
  runtime_config: dict[str, Any] | None = None,
320
+ shared_tool_output_manager: Any | None = None,
321
+ *,
322
+ pause_resume_callback: Any | None = None,
236
323
  ) -> Any:
237
324
  """Build a LangGraphReactAgent from a glaip_sdk Agent definition.
238
325
 
@@ -240,6 +327,10 @@ class LangGraphRunner(BaseRunner):
240
327
  agent: The glaip_sdk Agent to convert.
241
328
  runtime_config: Optional runtime configuration with tool_configs,
242
329
  mcp_configs, agent_config, and agent-specific overrides.
330
+ shared_tool_output_manager: Optional ToolOutputManager to reuse across
331
+ agents with tool_output_sharing enabled.
332
+ pause_resume_callback: Optional callback used to pause/resume the renderer
333
+ during interactive HITL prompts.
243
334
 
244
335
  Returns:
245
336
  A configured LangGraphReactAgent instance.
@@ -249,6 +340,7 @@ class LangGraphRunner(BaseRunner):
249
340
  ValueError: If agent has unsupported tools, MCPs, or sub-agents for local mode.
250
341
  """
251
342
  from aip_agents.agent import LangGraphReactAgent # noqa: PLC0415
343
+
252
344
  from glaip_sdk.runner.tool_adapter import LangChainToolAdapter # noqa: PLC0415
253
345
 
254
346
  # Adapt tools for local execution
@@ -260,9 +352,6 @@ class LangGraphRunner(BaseRunner):
260
352
  adapter = LangChainToolAdapter()
261
353
  langchain_tools = adapter.adapt_tools(agent.tools)
262
354
 
263
- # Build sub-agents recursively
264
- sub_agent_instances = self._build_sub_agents(agent.agents, runtime_config)
265
-
266
355
  # Normalize runtime config: merge global and agent-specific configs
267
356
  normalized_config = self._normalize_runtime_config(runtime_config, agent)
268
357
 
@@ -276,6 +365,19 @@ class LangGraphRunner(BaseRunner):
276
365
  merged_agent_config = self._merge_agent_config(agent, normalized_config)
277
366
  agent_config_params, agent_config_kwargs = self._apply_agent_config(merged_agent_config)
278
367
 
368
+ tool_output_manager = self._resolve_tool_output_manager(
369
+ agent,
370
+ merged_agent_config,
371
+ shared_tool_output_manager,
372
+ )
373
+
374
+ # Build sub-agents recursively, sharing tool output manager when enabled.
375
+ sub_agent_instances = self._build_sub_agents(
376
+ agent.agents,
377
+ runtime_config,
378
+ shared_tool_output_manager=tool_output_manager,
379
+ )
380
+
279
381
  # Build the LangGraphReactAgent with tools, sub-agents, and configs
280
382
  local_agent = LangGraphReactAgent(
281
383
  name=agent.name,
@@ -285,6 +387,7 @@ class LangGraphRunner(BaseRunner):
285
387
  tools=langchain_tools,
286
388
  agents=sub_agent_instances if sub_agent_instances else None,
287
389
  tool_configs=tool_configs if tool_configs else None,
390
+ tool_output_manager=tool_output_manager,
288
391
  **agent_config_params,
289
392
  **agent_config_kwargs,
290
393
  )
@@ -292,6 +395,11 @@ class LangGraphRunner(BaseRunner):
292
395
  # Add MCP servers if configured
293
396
  self._add_mcp_servers(local_agent, agent, mcp_configs)
294
397
 
398
+ # Inject local HITL manager only if hitl_enabled is True (master switch).
399
+ # This matches remote behavior: hitl_enabled gates the HITL plumbing.
400
+ # Tool-level HITL configs are only enforced when hitl_enabled=True.
401
+ self._inject_hitl_manager(local_agent, merged_agent_config, agent.name, pause_resume_callback)
402
+
295
403
  logger.debug(
296
404
  "Built local LangGraphReactAgent for agent '%s' with %d tools, %d sub-agents, and %d MCPs",
297
405
  agent.name,
@@ -301,16 +409,60 @@ class LangGraphRunner(BaseRunner):
301
409
  )
302
410
  return local_agent
303
411
 
412
+ def _resolve_tool_output_manager(
413
+ self,
414
+ agent: Agent,
415
+ merged_agent_config: dict[str, Any],
416
+ shared_tool_output_manager: Any | None,
417
+ ) -> Any | None:
418
+ """Resolve tool output manager for local agent execution."""
419
+ tool_output_sharing_enabled = merged_agent_config.get("tool_output_sharing", False)
420
+ if not tool_output_sharing_enabled:
421
+ return None
422
+ if shared_tool_output_manager is not None:
423
+ return shared_tool_output_manager
424
+ return build_tool_output_manager(agent.name, merged_agent_config)
425
+
426
+ def _inject_hitl_manager(
427
+ self,
428
+ local_agent: Any,
429
+ merged_agent_config: dict[str, Any],
430
+ agent_name: str,
431
+ pause_resume_callback: Any | None,
432
+ ) -> None:
433
+ """Inject HITL manager when enabled, mirroring remote gating behavior."""
434
+ hitl_enabled = merged_agent_config.get("hitl_enabled", False)
435
+ if hitl_enabled:
436
+ try:
437
+ local_agent.hitl_manager = ApprovalManager(
438
+ prompt_handler=LocalPromptHandler(pause_resume_callback=pause_resume_callback)
439
+ )
440
+ # Store callback reference for setting renderer later
441
+ if pause_resume_callback:
442
+ local_agent._pause_resume_callback = pause_resume_callback
443
+ logger.debug("HITL manager injected for agent '%s' (hitl_enabled=True)", agent_name)
444
+ except ImportError as e:
445
+ # Missing dependencies - fail fast
446
+ raise ImportError("Local HITL requires aip_agents. Install with: pip install 'glaip-sdk[local]'") from e
447
+ except Exception as e:
448
+ # Other errors during HITL setup - fail fast
449
+ raise RuntimeError(f"Failed to initialize HITL manager for agent '{agent_name}'") from e
450
+ else:
451
+ logger.debug("HITL manager not injected for agent '%s' (hitl_enabled=False)", agent_name)
452
+
304
453
  def _build_sub_agents(
305
454
  self,
306
455
  sub_agents: list[Any] | None,
307
456
  runtime_config: dict[str, Any] | None,
457
+ shared_tool_output_manager: Any | None = None,
308
458
  ) -> list[Any]:
309
459
  """Build sub-agent instances recursively.
310
460
 
311
461
  Args:
312
462
  sub_agents: List of sub-agent definitions.
313
463
  runtime_config: Runtime config to pass to sub-agents.
464
+ shared_tool_output_manager: Optional ToolOutputManager to reuse across
465
+ agents with tool_output_sharing enabled.
314
466
 
315
467
  Returns:
316
468
  List of built sub-agent instances.
@@ -324,7 +476,13 @@ class LangGraphRunner(BaseRunner):
324
476
  sub_agent_instances = []
325
477
  for sub_agent in sub_agents:
326
478
  self._validate_sub_agent_for_local_mode(sub_agent)
327
- sub_agent_instances.append(self.build_langgraph_agent(sub_agent, runtime_config))
479
+ sub_agent_instances.append(
480
+ self.build_langgraph_agent(
481
+ sub_agent,
482
+ runtime_config,
483
+ shared_tool_output_manager=shared_tool_output_manager,
484
+ )
485
+ )
328
486
  return sub_agent_instances
329
487
 
330
488
  def _add_mcp_servers(
@@ -571,7 +729,13 @@ class LangGraphRunner(BaseRunner):
571
729
  if "planning" in agent_config:
572
730
  direct_params["planning"] = agent_config["planning"]
573
731
 
732
+ if "enable_a2a_token_streaming" in agent_config:
733
+ direct_params["enable_a2a_token_streaming"] = agent_config["enable_a2a_token_streaming"]
734
+
574
735
  # Kwargs parameters (passed through **kwargs to BaseAgent)
736
+ if "enable_pii" in agent_config:
737
+ kwargs_params["enable_pii"] = agent_config["enable_pii"]
738
+
575
739
  if "memory" in agent_config:
576
740
  # Map "memory" to "memory_backend" for aip-agents compatibility
577
741
  kwargs_params["memory_backend"] = agent_config["memory"]
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import json
10
10
  import logging
11
+ import sys
11
12
  from datetime import datetime, timezone
12
13
  from time import monotonic
13
14
  from typing import Any
@@ -349,6 +350,9 @@ class RichStreamRenderer(TranscriptModeMixin):
349
350
  self._handle_status_event(ev)
350
351
  elif kind == "content":
351
352
  self._handle_content_event(content)
353
+ elif kind == "token":
354
+ # Token events should stream content incrementally with immediate console output
355
+ self._handle_token_event(content)
352
356
  elif kind == "final_response":
353
357
  self._handle_final_response_event(content, metadata)
354
358
  elif kind in {"agent_step", "agent_thinking_step"}:
@@ -368,6 +372,31 @@ class RichStreamRenderer(TranscriptModeMixin):
368
372
  self.state.append_transcript_text(content)
369
373
  self._ensure_live()
370
374
 
375
+ def _handle_token_event(self, content: str) -> None:
376
+ """Handle token streaming events - print immediately for real-time streaming."""
377
+ if content:
378
+ self.state.append_transcript_text(content)
379
+ # Print token content directly to stdout for immediate visibility when not verbose
380
+ # This bypasses Rich's Live display which has refresh rate limitations
381
+ if not self.verbose:
382
+ try:
383
+ # Mark that we're streaming tokens directly to prevent Live display from starting
384
+ self._streaming_tokens_directly = True
385
+ # Stop Live display if active to prevent it from intercepting stdout
386
+ # and causing each token to appear on a new line
387
+ if self.live is not None:
388
+ self._stop_live_display()
389
+ # Write directly to stdout - tokens will stream on the same line
390
+ # since we're bypassing Rich's console which adds newlines
391
+ sys.stdout.write(content)
392
+ sys.stdout.flush()
393
+ except Exception:
394
+ # Fallback to live display if direct write fails
395
+ self._ensure_live()
396
+ else:
397
+ # In verbose mode, use normal live display (debug panels handle the output)
398
+ self._ensure_live()
399
+
371
400
  def _handle_final_response_event(self, content: str, metadata: dict[str, Any]) -> None:
372
401
  """Handle final response events."""
373
402
  if content:
@@ -521,6 +550,18 @@ class RichStreamRenderer(TranscriptModeMixin):
521
550
  if getattr(self, "_transcript_mode_enabled", False):
522
551
  return
523
552
 
553
+ # When verbose=False and tokens were streamed directly, skip final panel
554
+ # The user's script will print the final result, avoiding duplication
555
+ if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
556
+ # Add a newline after streaming tokens for clean separation
557
+ try:
558
+ sys.stdout.write("\n")
559
+ sys.stdout.flush()
560
+ except Exception:
561
+ pass
562
+ self.state.printed_final_output = True
563
+ return
564
+
524
565
  if self.verbose:
525
566
  panel = build_final_panel(
526
567
  self.state,
@@ -597,6 +638,19 @@ class RichStreamRenderer(TranscriptModeMixin):
597
638
 
598
639
  def _finalize_display(self) -> None:
599
640
  """Finalize live display and render final output."""
641
+ # When verbose=False and tokens were streamed directly, skip live display updates
642
+ # to avoid showing duplicate final result
643
+ if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
644
+ # Just add a newline after streaming tokens for clean separation
645
+ try:
646
+ sys.stdout.write("\n")
647
+ sys.stdout.flush()
648
+ except Exception:
649
+ pass
650
+ self._stop_live_display()
651
+ self.state.printed_final_output = True
652
+ return
653
+
600
654
  # Final refresh
601
655
  self._ensure_live()
602
656
 
@@ -629,6 +683,10 @@ class RichStreamRenderer(TranscriptModeMixin):
629
683
  """Ensure live display is updated."""
630
684
  if getattr(self, "_transcript_mode_enabled", False):
631
685
  return
686
+ # When verbose=False, don't start Live display if we're streaming tokens directly
687
+ # This prevents Live from intercepting stdout and causing tokens to appear on separate lines
688
+ if not self.verbose and getattr(self, "_streaming_tokens_directly", False):
689
+ return
632
690
  if not self._ensure_live_stack():
633
691
  return
634
692