agent-starter-pack 0.13.1__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {agent_starter_pack-0.13.1.dist-info → agent_starter_pack-0.14.1.dist-info}/METADATA +13 -4
  2. {agent_starter_pack-0.13.1.dist-info → agent_starter_pack-0.14.1.dist-info}/RECORD +23 -25
  3. agents/adk_base/notebooks/evaluating_adk_agent.ipynb +78 -71
  4. agents/agentic_rag/notebooks/evaluating_adk_agent.ipynb +78 -71
  5. llm.txt +87 -39
  6. src/base_template/Makefile +16 -1
  7. src/base_template/README.md +1 -1
  8. src/cli/commands/create.py +27 -5
  9. src/cli/commands/enhance.py +132 -6
  10. src/cli/commands/setup_cicd.py +91 -69
  11. src/cli/utils/cicd.py +105 -0
  12. src/cli/utils/gcp.py +19 -13
  13. src/cli/utils/logging.py +13 -1
  14. src/cli/utils/template.py +3 -0
  15. src/frontends/live_api_react/frontend/package-lock.json +9 -9
  16. src/frontends/live_api_react/frontend/src/App.tsx +12 -153
  17. src/frontends/live_api_react/frontend/src/components/side-panel/SidePanel.tsx +352 -3
  18. src/frontends/live_api_react/frontend/src/components/side-panel/side-panel.scss +249 -2
  19. src/frontends/live_api_react/frontend/src/utils/multimodal-live-client.ts +4 -1
  20. src/resources/docs/adk-cheatsheet.md +285 -38
  21. src/frontends/live_api_react/frontend/src/components/control-tray/ControlTray.tsx +0 -217
  22. src/frontends/live_api_react/frontend/src/components/control-tray/control-tray.scss +0 -201
  23. {agent_starter_pack-0.13.1.dist-info → agent_starter_pack-0.14.1.dist-info}/WHEEL +0 -0
  24. {agent_starter_pack-0.13.1.dist-info → agent_starter_pack-0.14.1.dist-info}/entry_points.txt +0 -0
  25. {agent_starter_pack-0.13.1.dist-info → agent_starter_pack-0.14.1.dist-info}/licenses/LICENSE +0 -0
@@ -8,6 +8,7 @@ This document serves as a long-form, comprehensive reference for building, orche
8
8
  * 1.1 ADK's Foundational Principles
9
9
  * 1.2 Essential Primitives
10
10
  * 1.3 Standard Project Layout
11
+ * 1.A Build Agents without Code (Agent Config)
11
12
  2. [Agent Definitions (`LlmAgent`)](#2-agent-definitions-llmagent)
12
13
  * 2.1 Basic `LlmAgent` Setup
13
14
  * 2.2 Advanced `LlmAgent` Configuration
@@ -20,6 +21,7 @@ This document serves as a long-form, comprehensive reference for building, orche
20
21
  * 4.1 Agent Hierarchy
21
22
  * 4.2 Inter-Agent Communication Mechanisms
22
23
  * 4.3 Common Multi-Agent Patterns
24
+ * 4.A Distributed Communication (A2A Protocol)
23
25
  5. [Building Custom Agents (`BaseAgent`)](#5-building-custom-agents-baseagent)
24
26
  * 5.1 When to Use Custom Agents
25
27
  * 5.2 Implementing `_run_async_impl`
@@ -46,6 +48,7 @@ This document serves as a long-form, comprehensive reference for building, orche
46
48
  * 10.1 Callback Mechanism: Interception & Control
47
49
  * 10.2 Types of Callbacks
48
50
  * 10.3 Callback Best Practices
51
+ * 10.A Global Control with Plugins
49
52
  11. [Authentication for Tools](#11-authentication-for-tools)
50
53
  * 11.1 Core Concepts: `AuthScheme` & `AuthCredential`
51
54
  * 11.2 Interactive OAuth/OIDC Flows
@@ -60,9 +63,10 @@ This document serves as a long-form, comprehensive reference for building, orche
60
63
  * 13.1 Agent Evaluation (`adk eval`)
61
64
  * 13.2 Safety & Guardrails
62
65
  14. [Debugging, Logging & Observability](#14-debugging-logging--observability)
63
- 15. [Advanced I/O Modalities](#15-advanced-io-modalities)
66
+ 15. [Streaming & Advanced I/O](#15-streaming--advanced-io)
64
67
  16. [Performance Optimization](#16-performance-optimization)
65
68
  17. [General Best Practices & Common Pitfalls](#17-general-best-practices--common-pitfalls)
69
+ 18. [Official API & CLI References](#18-official-api--cli-references)
66
70
 
67
71
  ---
68
72
 
@@ -115,6 +119,121 @@ your_project_root/
115
119
  * `adk web` and `adk run` automatically discover agents in subdirectories with `__init__.py` and `agent.py`.
116
120
  * `.env` files are automatically loaded by `adk` tools when run from the root or agent directory.
117
121
 
122
+ ### 1.A Build Agents without Code (Agent Config)
123
+
124
+ ADK allows you to define agents, tools, and even multi-agent workflows using a simple YAML format, eliminating the need to write Python code for orchestration. This is ideal for rapid prototyping and for non-programmers to configure agents.
125
+
126
+ #### **Getting Started with Agent Config**
127
+
128
+ * **Create a Config-based Agent**:
129
+ ```bash
130
+ adk create --type=config my_yaml_agent
131
+ ```
132
+ This generates a `my_yaml_agent/` folder with `root_agent.yaml` and `.env` files.
133
+
134
+ * **Environment Setup** (in `.env` file):
135
+ ```bash
136
+ # For Google AI Studio (simpler setup)
137
+ GOOGLE_GENAI_USE_VERTEXAI=0
138
+ GOOGLE_API_KEY=<your-Google-Gemini-API-key>
139
+
140
+ # For Google Cloud Vertex AI (production)
141
+ GOOGLE_GENAI_USE_VERTEXAI=1
142
+ GOOGLE_CLOUD_PROJECT=<your_gcp_project>
143
+ GOOGLE_CLOUD_LOCATION=us-central1
144
+ ```
145
+
146
+ #### **Core Agent Config Structure**
147
+
148
+ * **Basic Agent (`root_agent.yaml`)**:
149
+ ```yaml
150
+ # yaml-language-server: $schema=https://raw.githubusercontent.com/google/adk-python/refs/heads/main/src/google/adk/agents/config_schemas/AgentConfig.json
151
+ name: assistant_agent
152
+ model: gemini-2.5-flash
153
+ description: A helper agent that can answer users' various questions.
154
+ instruction: You are an agent to help answer users' various questions.
155
+ ```
156
+
157
+ * **Agent with Built-in Tools**:
158
+ ```yaml
159
+ name: search_agent
160
+ model: gemini-2.0-flash
161
+ description: 'an agent whose job it is to perform Google search queries and answer questions about the results.'
162
+ instruction: You are an agent whose job is to perform Google search queries and answer questions about the results.
163
+ tools:
164
+ - name: google_search # Built-in ADK tool
165
+ ```
166
+
167
+ * **Agent with Custom Tools**:
168
+ ```yaml
169
+ agent_class: LlmAgent
170
+ model: gemini-2.5-flash
171
+ name: prime_agent
172
+ description: Handles checking if numbers are prime.
173
+ instruction: |
174
+ You are responsible for checking whether numbers are prime.
175
+ When asked to check primes, you must call the check_prime tool with a list of integers.
176
+ Never attempt to determine prime numbers manually.
177
+ tools:
178
+ - name: ma_llm.check_prime # Reference to Python function
179
+ ```
180
+
181
+ * **Multi-Agent System with Sub-Agents**:
182
+ ```yaml
183
+ agent_class: LlmAgent
184
+ model: gemini-2.5-flash
185
+ name: root_agent
186
+ description: Learning assistant that provides tutoring in code and math.
187
+ instruction: |
188
+ You are a learning assistant that helps students with coding and math questions.
189
+
190
+ You delegate coding questions to the code_tutor_agent and math questions to the math_tutor_agent.
191
+
192
+ Follow these steps:
193
+ 1. If the user asks about programming or coding, delegate to the code_tutor_agent.
194
+ 2. If the user asks about math concepts or problems, delegate to the math_tutor_agent.
195
+ 3. Always provide clear explanations and encourage learning.
196
+ sub_agents:
197
+ - config_path: code_tutor_agent.yaml
198
+ - config_path: math_tutor_agent.yaml
199
+ ```
200
+
201
+ #### **Loading Agent Config in Python**
202
+
203
+ ```python
204
+ from google.adk.agents import config_agent_utils
205
+ root_agent = config_agent_utils.from_config("{agent_folder}/root_agent.yaml")
206
+ ```
207
+
208
+ #### **Running Agent Config Agents**
209
+
210
+ From the agent directory, use any of these commands:
211
+ * `adk web` - Launch web UI interface
212
+ * `adk run` - Run in terminal without UI
213
+ * `adk api_server` - Run as a service for other applications
214
+
215
+ #### **Deployment Support**
216
+
217
+ Agent Config agents can be deployed using:
218
+ * `adk deploy cloud_run` - Deploy to Google Cloud Run
219
+ * `adk deploy agent_engine` - Deploy to Vertex AI Agent Engine
220
+
221
+ #### **Key Features & Capabilities**
222
+
223
+ * **Supported Built-in Tools**: `google_search`, `load_artifacts`, `url_context`, `exit_loop`, `preload_memory`, `get_user_choice`, `enterprise_web_search`, `load_web_page`
224
+ * **Custom Tool Integration**: Reference Python functions using fully qualified module paths
225
+ * **Multi-Agent Orchestration**: Link agents via `config_path` references
226
+ * **Schema Validation**: Built-in YAML schema for IDE support and validation
227
+
228
+ #### **Current Limitations** (Experimental Feature)
229
+
230
+ * **Model Support**: Only Gemini models currently supported
231
+ * **Language Support**: Custom tools must be written in Python
232
+ * **Unsupported Agent Types**: `LangGraphAgent`, `A2aAgent`
233
+ * **Unsupported Tools**: `AgentTool`, `LongRunningFunctionTool`, `VertexAiSearchTool`, `MCPToolset`, `CrewaiTool`, `LangchainTool`, `ExampleTool`
234
+
235
+ For complete examples and reference, see the [ADK samples repository](https://github.com/search?q=repo%3Agoogle%2Fadk-python+path%3A%2F%5Econtributing%5C%2Fsamples%5C%2F%2F+.yaml&type=code).
236
+
118
237
  ---
119
238
 
120
239
  ## 2. Agent Definitions (`LlmAgent`)
@@ -228,18 +347,33 @@ This is the most reliable way to make an LLM produce predictable, parseable JSON
228
347
  agent = Agent(..., include_contents='none')
229
348
  ```
230
349
 
231
- * **`planner`**: Assign a `BasePlanner` instance (e.g., `ReActPlanner`) to enable multi-step reasoning and planning. (Advanced, covered in Multi-Agents).
350
+ * **`planner`**: Assign a `BasePlanner` instance to enable multi-step reasoning.
351
+ * **`BuiltInPlanner`**: Leverages a model's native "thinking" or planning capabilities (e.g., Gemini).
352
+ ```python
353
+ from google.adk.planners import BuiltInPlanner
354
+ from google.genai.types import ThinkingConfig
355
+
356
+ agent = Agent(
357
+ model="gemini-2.5-flash",
358
+ planner=BuiltInPlanner(
359
+ thinking_config=ThinkingConfig(include_thoughts=True)
360
+ ),
361
+ # ... tools ...
362
+ )
363
+ ```
364
+ * **`PlanReActPlanner`**: Instructs the model to follow a structured Plan-Reason-Act output format, useful for models without built-in planning.
232
365
 
233
- * **`executor`**: Assign a `BaseCodeExecutor` (e.g., `BuiltInCodeExecutor`) to allow the agent to execute code blocks.
234
- ```python
235
- from google.adk.code_executors import BuiltInCodeExecutor
236
- agent = Agent(
237
- name="code_agent",
238
- model="gemini-2.5-flash",
239
- instruction="Write and execute Python code to solve math problems.",
240
- executor=[BuiltInCodeExecutor] # Allows agent to run Python code
241
- )
242
- ```
366
+ * **`code_executor`**: Assign a `BaseCodeExecutor` to allow the agent to execute code blocks.
367
+ * **`BuiltInCodeExecutor`**: The standard, sandboxed code executor provided by ADK for safe execution.
368
+ ```python
369
+ from google.adk.code_executors import BuiltInCodeExecutor
370
+ agent = Agent(
371
+ name="code_agent",
372
+ model="gemini-2.5-flash",
373
+ instruction="Write and execute Python code to solve math problems.",
374
+ code_executor=BuiltInCodeExecutor() # Corrected from a list to an instance
375
+ )
376
+ ```
243
377
 
244
378
  * **Callbacks**: Hooks for observing and modifying agent behavior at key lifecycle points (`before_model_callback`, `after_tool_callback`, etc.). (Covered in Callbacks).
245
379
 
@@ -541,6 +675,33 @@ interactive_planner_agent = LlmAgent(
541
675
  root_agent = interactive_planner_agent
542
676
  ```
543
677
 
678
+ ### 4.A. Distributed Communication (A2A Protocol)
679
+
680
+ The Agent-to-Agent (A2A) Protocol enables agents to communicate over a network, even if they are written in different languages or run as separate services. Use A2A for integrating with third-party agents, building microservice-based agent architectures, or when a strong, formal API contract is needed. For internal code organization, prefer local sub-agents.
681
+
682
+ * **Exposing an Agent**: Make an existing ADK agent available to others over A2A.
683
+ * **`to_a2a()` Utility**: The simplest method. Wraps your `root_agent` and creates a runnable FastAPI app, auto-generating the required `agent.json` card.
684
+ ```python
685
+ from google.adk.a2a.utils.agent_to_a2a import to_a2a
686
+ # root_agent is your existing ADK Agent instance
687
+ a2a_app = to_a2a(root_agent, port=8001)
688
+ # Run with: uvicorn your_module:a2a_app --host localhost --port 8001
689
+ ```
690
+ * **`adk api_server --a2a`**: A CLI command that serves agents from a directory. Requires you to manually create an `agent.json` card for each agent you want to expose.
691
+
692
+ * **Consuming a Remote Agent**: Use a remote A2A agent as if it were a local agent.
693
+ * **`RemoteA2aAgent`**: This agent acts as a client proxy. You initialize it with the URL to the remote agent's card.
694
+ ```python
695
+ from google.adk.a2a.remote_a2a_agent import RemoteA2aAgent
696
+
697
+ # This agent can now be used as a sub-agent or tool
698
+ prime_checker_agent = RemoteA2aAgent(
699
+ name="prime_agent",
700
+ description="A remote agent that checks if numbers are prime.",
701
+ agent_card="http://localhost:8001/a2a/check_prime_agent/.well-known/agent.json"
702
+ )
703
+ ```
704
+
544
705
  ---
545
706
 
546
707
  ## 5. Building Custom Agents (`BaseAgent`)
@@ -728,17 +889,30 @@ Tools extend an agent's abilities beyond text generation.
728
889
 
729
890
  ### 7.3 All Tool Types & Their Usage
730
891
 
731
- ADK supports a diverse ecosystem of tools.
892
+ 1. **Custom Function Tools**:
893
+ * **`FunctionTool`**: The most common type, wrapping a standard Python function.
894
+ * **`LongRunningFunctionTool`**: Wraps an `async` function that `yields` intermediate results, for tasks that provide progress updates.
895
+ * **`AgentTool`**: Wraps another `BaseAgent` instance, allowing it to be invoked as a tool by a parent agent.
896
+
897
+ 2. **Built-in Tools**: Ready-to-use tools provided by ADK.
898
+ * `google_search`: Provides Google Search grounding.
899
+ * `BuiltInCodeExecutor`: Enables sandboxed code execution.
900
+ * `VertexAiSearchTool`: Provides grounding from your private Vertex AI Search data stores.
901
+ * `BigQueryToolset`: A collection of tools for interacting with BigQuery (e.g., `list_datasets`, `execute_sql`).
902
+ > **Warning**: An agent can only use one type of built-in tool at a time and they cannot be used in sub-agents.
903
+
904
+ 3. **Third-Party Tool Wrappers**: For seamless integration with other frameworks.
905
+ * `LangchainTool`: Wraps a tool from the LangChain ecosystem.
906
+ * `CrewaiTool`: Wraps a tool from the CrewAI library.
907
+
908
+ 4. **OpenAPI & Protocol Tools**: For interacting with APIs and services.
909
+ * **`OpenAPIToolset`**: Automatically generates a set of `RestApiTool`s from an OpenAPI (Swagger) v3 specification.
910
+ * **`MCPToolset`**: Connects to an external Model Context Protocol (MCP) server to dynamically load its tools.
732
911
 
733
- 1. **`FunctionTool`**: Wraps any Python callable. The most common tool type.
734
- 2. **`LongRunningFunctionTool`**: For `async` functions that `yield` intermediate results.
735
- 3. **`AgentTool`**: Wraps another `BaseAgent` instance, allowing it to be called as a tool.
736
- 4. **`OpenAPIToolset`**: Automatically generates tools from an OpenAPI (Swagger) specification.
737
- 5. **`MCPToolset`**: Connects to an external Model Context Protocol (MCP) server.
738
- 6. **Built-in Tools**: `google_search`, `BuiltInCodeExecutor`, `VertexAiSearchTool`. e.g `from google.adk.tools import google_search`
739
- Note: google_search is a special tool automatically invoked by the model. It can be passed directly to the agent without wrapping in a custom function.
740
- 7. **Third-Party Tool Wrappers**: `LangchainTool`, `CrewaiTool`.
741
- 8. **Google Cloud Tools**: `ApiHubToolset`, `ApplicationIntegrationToolset`.
912
+ 5. **Google Cloud Tools**: For deep integration with Google Cloud services.
913
+ * **`ApiHubToolset`**: Turns any documented API from Apigee API Hub into a tool.
914
+ * **`ApplicationIntegrationToolset`**: Turns Application Integration workflows and Integration Connectors (e.g., Salesforce, SAP) into callable tools.
915
+ * **Toolbox for Databases**: An open-source MCP server that ADK can connect to for database interactions.
742
916
 
743
917
  ---
744
918
 
@@ -903,6 +1077,35 @@ def citation_replacement_callback(callback_context: CallbackContext) -> genai_ty
903
1077
  # Used in an agent like this:
904
1078
  # report_composer = LlmAgent(..., after_agent_callback=citation_replacement_callback)
905
1079
  ```
1080
+
1081
+ ### 10.A. Global Control with Plugins
1082
+
1083
+ Plugins are stateful, reusable modules for implementing cross-cutting concerns that apply globally to all agents, tools, and model calls managed by a `Runner`. Unlike Callbacks which are configured per-agent, Plugins are registered once on the `Runner`.
1084
+
1085
+ * **Use Cases**: Ideal for universal logging, application-wide policy enforcement, global caching, and collecting metrics.
1086
+ * **Execution Order**: Plugin callbacks run **before** their corresponding agent-level callbacks. If a plugin callback returns a value, the agent-level callback is skipped.
1087
+ * **Defining a Plugin**: Inherit from `BasePlugin` and implement callback methods.
1088
+ ```python
1089
+ from google.adk.plugins.base_plugin import BasePlugin
1090
+ from google.adk.agents.callback_context import CallbackContext
1091
+
1092
+ class InvocationCounterPlugin(BasePlugin):
1093
+ def __init__(self):
1094
+ super().__init__(name="invocation_counter")
1095
+ self.agent_runs = 0
1096
+
1097
+ async def before_agent_callback(self, callback_context: CallbackContext, **kwargs):
1098
+ self.agent_runs += 1
1099
+ print(f"[Plugin] Total agent runs: {self.agent_runs}")
1100
+ ```
1101
+ * **Registering a Plugin**:
1102
+ ```python
1103
+ from google.adk.runners import Runner
1104
+ # runner = Runner(agent=root_agent, ..., plugins=[InvocationCounterPlugin()])
1105
+ ```
1106
+ * **Error Handling Callbacks**: Plugins support unique error hooks like `on_model_error_callback` and `on_tool_error_callback` for centralized error management.
1107
+ * **Limitation**: Plugins are not supported by the `adk web` interface.
1108
+
906
1109
  ---
907
1110
 
908
1111
  ## 11. Authentication for Tools
@@ -960,6 +1163,7 @@ From local dev to production.
960
1163
  Fully managed, scalable service for ADK agents on Google Cloud.
961
1164
 
962
1165
  * **Features**: Auto-scaling, session management, observability integration.
1166
+ * **ADK CLI**: `adk deploy agent_engine --project <id> --region <loc> ... /path/to/agent`
963
1167
  * **Deployment**: Use `vertexai.agent_engines.create()`.
964
1168
  ```python
965
1169
  from vertexai.preview import reasoning_engines # or agent_engines directly in later versions
@@ -981,6 +1185,7 @@ Fully managed, scalable service for ADK agents on Google Cloud.
981
1185
 
982
1186
  Serverless container platform for custom web applications.
983
1187
 
1188
+ * **ADK CLI**: `adk deploy cloud_run --project <id> --region <loc> ... /path/to/agent`
984
1189
  * **Deployment**:
985
1190
  1. Create a `Dockerfile` for your FastAPI app (using `google.adk.cli.fast_api.get_fast_api_app`).
986
1191
  2. Use `gcloud run deploy --source .`.
@@ -1006,6 +1211,7 @@ Serverless container platform for custom web applications.
1006
1211
 
1007
1212
  For maximum control, run your containerized agent in a Kubernetes cluster.
1008
1213
 
1214
+ * **ADK CLI**: `adk deploy gke --project <id> --cluster_name <name> ... /path/to/agent`
1009
1215
  * **Deployment**:
1010
1216
  1. Build Docker image (`gcloud builds submit`).
1011
1217
  2. Create Kubernetes Deployment and Service YAMLs.
@@ -1096,6 +1302,11 @@ Multi-layered defense against harmful content, misalignment, and unsafe actions.
1096
1302
  6. **Network Controls & VPC-SC**: Confine agent activity within secure perimeters (VPC Service Controls) to prevent data exfiltration.
1097
1303
  7. **Output Escaping in UIs**: Always properly escape LLM-generated content in web UIs to prevent XSS attacks and indirect prompt injections.
1098
1304
 
1305
+ **Grounding**: A key safety and reliability feature that connects agent responses to verifiable information.
1306
+ * **Mechanism**: Uses tools like `google_search` or `VertexAiSearchTool` to fetch real-time or private data.
1307
+ * **Benefit**: Reduces model hallucination by basing responses on retrieved facts.
1308
+ * **Requirement**: When using `google_search`, your application UI **must** display the provided search suggestions and citations to comply with terms of service.
1309
+
1099
1310
  ---
1100
1311
 
1101
1312
  ## 14. Debugging, Logging & Observability
@@ -1121,8 +1332,13 @@ Multi-layered defense against harmful content, misalignment, and unsafe actions.
1121
1332
  print(f" ERROR: {event.error_message}")
1122
1333
  ```
1123
1334
  * **Tool/Callback `print` statements**: Simple logging directly within your functions.
1124
- * **Python `logging` module**: Integrate with standard logging frameworks.
1125
- * **Tracing Integrations**: ADK supports OpenTelemetry (e.g., via Comet Opik) for distributed tracing.
1335
+ * **Logging**: Use Python's standard `logging` module. Control verbosity with `adk web --log_level DEBUG` or `adk web -v`.
1336
+ * **Observability Integrations**: ADK supports OpenTelemetry, enabling integration with platforms like:
1337
+ * Google Cloud Trace
1338
+ * AgentOps
1339
+ * Arize AX
1340
+ * Phoenix
1341
+ * Weave by WandB
1126
1342
  ```python
1127
1343
  # Example using Comet Opik integration (conceptual)
1128
1344
  # pip install comet_opik_adk
@@ -1134,22 +1350,41 @@ Multi-layered defense against harmful content, misalignment, and unsafe actions.
1134
1350
 
1135
1351
  ---
1136
1352
 
1137
- ## 15. Advanced I/O Modalities
1353
+ ## 15. Streaming & Advanced I/O
1138
1354
 
1139
- ADK (especially with Gemini Live API models) supports richer interactions.
1355
+ ADK supports real-time, bidirectional communication for interactive experiences like live voice conversations.
1140
1356
 
1141
- * **Audio**: Input via `Blob(mime_type="audio/pcm", data=bytes)`, Output via `genai_types.SpeechConfig` in `RunConfig`.
1142
- * **Vision (Images/Video)**: Input via `Blob(mime_type="image/jpeg", data=bytes)` or `Blob(mime_type="video/mp4", data=bytes)`. Models like `gemini-2.5-flash-exp` can process these.
1143
- * **Multimodal Input in `Content`**:
1144
- ```python
1145
- multimodal_content = genai_types.Content(
1146
- parts=[
1147
- genai_types.Part(text="Describe this image:"),
1148
- genai_types.Part(inline_data=genai_types.Blob(mime_type="image/jpeg", data=image_bytes))
1149
- ]
1150
- )
1151
- ```
1152
- * **Streaming Modalities**: `RunConfig.response_modalities=['TEXT', 'AUDIO']`.
1357
+ * **Bidirectional Streaming**: Enables low-latency, two-way data flow (text, audio, video) between the client and agent, allowing for interruptions.
1358
+ * **Core Components**:
1359
+ * **`Runner.run_live()`**: The entry point for starting a streaming session.
1360
+ * **`LiveRequestQueue`**: A queue for sending data (e.g., audio chunks) from the client to the agent during a live session.
1361
+ * **`RunConfig`**: A configuration object passed to `run_live()` to specify modalities (`['TEXT', 'AUDIO']`), speech synthesis options, etc.
1362
+ * **Streaming Tools**: A special type of `FunctionTool` that can stream intermediate results back to the agent.
1363
+ * **Definition**: Must be an `async` function with a return type of `AsyncGenerator`.
1364
+ ```python
1365
+ from typing import AsyncGenerator
1366
+
1367
+ async def monitor_stock_price(symbol: str) -> AsyncGenerator[str, None]:
1368
+ """Yields stock price updates as they occur."""
1369
+ while True:
1370
+ price = await get_live_price(symbol)
1371
+ yield f"Update for {symbol}: ${price}"
1372
+ await asyncio.sleep(5)
1373
+ ```
1374
+
1375
+ * **Advanced I/O Modalities**: ADK (especially with Gemini Live API models) supports richer interactions.
1376
+ * **Audio**: Input via `Blob(mime_type="audio/pcm", data=bytes)`, Output via `genai_types.SpeechConfig` in `RunConfig`.
1377
+ * **Vision (Images/Video)**: Input via `Blob(mime_type="image/jpeg", data=bytes)` or `Blob(mime_type="video/mp4", data=bytes)`. Models like `gemini-2.5-flash-exp` can process these.
1378
+ * **Multimodal Input in `Content`**:
1379
+ ```python
1380
+ multimodal_content = genai_types.Content(
1381
+ parts=[
1382
+ genai_types.Part(text="Describe this image:"),
1383
+ genai_types.Part(inline_data=genai_types.Blob(mime_type="image/jpeg", data=image_bytes))
1384
+ ]
1385
+ )
1386
+ ```
1387
+ * **Streaming Modalities**: `RunConfig.response_modalities=['TEXT', 'AUDIO']`.
1153
1388
 
1154
1389
  ---
1155
1390
 
@@ -1222,3 +1457,15 @@ async def main():
1222
1457
  if __name__ == "__main__":
1223
1458
  asyncio.run(main())
1224
1459
  ```
1460
+
1461
+ ---
1462
+
1463
+ ## 18. Official API & CLI References
1464
+
1465
+ For detailed specifications of all classes, methods, and commands, refer to the official reference documentation.
1466
+
1467
+ * [Python API Reference](./api-reference/python/index.html)
1468
+ * [Java API Reference](./api-reference/java/index.html)
1469
+ * [CLI Reference](./api-reference/cli/index.html)
1470
+ * [REST API Reference](./api-reference/rest/index.md)
1471
+ * [Agent Config YAML Reference](./api-reference/agentconfig/index.html)
@@ -1,217 +0,0 @@
1
- /**
2
- * Copyright 2024 Google LLC
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing, software
11
- * distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions and
14
- * limitations under the License.
15
- */
16
-
17
- import cn from "classnames";
18
-
19
- import { memo, ReactNode, RefObject, useEffect, useRef, useState } from "react";
20
- import { useLiveAPIContext } from "../../contexts/LiveAPIContext";
21
- import { UseMediaStreamResult } from "../../hooks/use-media-stream-mux";
22
- import { useScreenCapture } from "../../hooks/use-screen-capture";
23
- import { useWebcam } from "../../hooks/use-webcam";
24
- import { AudioRecorder } from "../../utils/audio-recorder";
25
- import AudioPulse from "../audio-pulse/AudioPulse";
26
- import "./control-tray.scss";
27
-
28
- export type ControlTrayProps = {
29
- videoRef: RefObject<HTMLVideoElement>;
30
- children?: ReactNode;
31
- supportsVideo: boolean;
32
- onVideoStreamChange?: (stream: MediaStream | null) => void;
33
- };
34
-
35
- type MediaStreamButtonProps = {
36
- isStreaming: boolean;
37
- onIcon: string;
38
- offIcon: string;
39
- start: () => Promise<any>;
40
- stop: () => any;
41
- };
42
-
43
- /**
44
- * button used for triggering webcam or screen-capture
45
- */
46
- const MediaStreamButton = memo(
47
- ({ isStreaming, onIcon, offIcon, start, stop }: MediaStreamButtonProps) =>
48
- isStreaming ? (
49
- <button className="action-button" onClick={stop}>
50
- <span className="material-symbols-outlined">{onIcon}</span>
51
- </button>
52
- ) : (
53
- <button className="action-button" onClick={start}>
54
- <span className="material-symbols-outlined">{offIcon}</span>
55
- </button>
56
- ),
57
- );
58
-
59
- function ControlTray({
60
- videoRef,
61
- children,
62
- onVideoStreamChange = () => {},
63
- supportsVideo,
64
- }: ControlTrayProps) {
65
- const videoStreams = [useWebcam(), useScreenCapture()];
66
- const [activeVideoStream, setActiveVideoStream] =
67
- useState<MediaStream | null>(null);
68
- const [webcam, screenCapture] = videoStreams;
69
- const [inVolume, setInVolume] = useState(0);
70
- const [audioRecorder] = useState(() => new AudioRecorder());
71
- const [muted, setMuted] = useState(false);
72
- const renderCanvasRef = useRef<HTMLCanvasElement>(null);
73
- const connectButtonRef = useRef<HTMLButtonElement>(null);
74
-
75
- const { client, connected, connect, disconnect, volume } =
76
- useLiveAPIContext();
77
-
78
- useEffect(() => {
79
- if (!connected && connectButtonRef.current) {
80
- connectButtonRef.current.focus();
81
- }
82
- }, [connected]);
83
- useEffect(() => {
84
- document.documentElement.style.setProperty(
85
- "--volume",
86
- `${Math.max(5, Math.min(inVolume * 200, 8))}px`,
87
- );
88
- }, [inVolume]);
89
-
90
- useEffect(() => {
91
- const onData = (base64: string) => {
92
- client.sendRealtimeInput([
93
- {
94
- mimeType: "audio/pcm;rate=16000",
95
- data: base64,
96
- },
97
- ]);
98
- };
99
- if (connected && !muted && audioRecorder) {
100
- audioRecorder.on("data", onData).on("volume", setInVolume).start();
101
- } else {
102
- audioRecorder.stop();
103
- }
104
- return () => {
105
- audioRecorder.off("data", onData).off("volume", setInVolume);
106
- };
107
- }, [connected, client, muted, audioRecorder]);
108
-
109
- useEffect(() => {
110
- if (videoRef.current) {
111
- videoRef.current.srcObject = activeVideoStream;
112
- }
113
-
114
- let timeoutId = -1;
115
-
116
- function sendVideoFrame() {
117
- const video = videoRef.current;
118
- const canvas = renderCanvasRef.current;
119
-
120
- if (!video || !canvas) {
121
- return;
122
- }
123
-
124
- const ctx = canvas.getContext("2d")!;
125
- canvas.width = video.videoWidth * 0.25;
126
- canvas.height = video.videoHeight * 0.25;
127
- if (canvas.width + canvas.height > 0) {
128
- ctx.drawImage(videoRef.current, 0, 0, canvas.width, canvas.height);
129
- const base64 = canvas.toDataURL("image/jpeg", 1.0);
130
- const data = base64.slice(base64.indexOf(",") + 1, Infinity);
131
- client.sendRealtimeInput([{ mimeType: "image/jpeg", data }]);
132
- }
133
- if (connected) {
134
- timeoutId = window.setTimeout(sendVideoFrame, 1000 / 0.5);
135
- }
136
- }
137
- if (connected && activeVideoStream !== null) {
138
- requestAnimationFrame(sendVideoFrame);
139
- }
140
- return () => {
141
- clearTimeout(timeoutId);
142
- };
143
- }, [connected, activeVideoStream, client, videoRef]);
144
-
145
- //handler for swapping from one video-stream to the next
146
- const changeStreams = (next?: UseMediaStreamResult) => async () => {
147
- if (next) {
148
- const mediaStream = await next.start();
149
- setActiveVideoStream(mediaStream);
150
- onVideoStreamChange(mediaStream);
151
- } else {
152
- setActiveVideoStream(null);
153
- onVideoStreamChange(null);
154
- }
155
-
156
- videoStreams.filter((msr) => msr !== next).forEach((msr) => msr.stop());
157
- };
158
-
159
- return (
160
- <section className="control-tray">
161
- <canvas style={{ display: "none" }} ref={renderCanvasRef} />
162
- <nav className={cn("actions-nav", { disabled: !connected })}>
163
- <button
164
- className={cn("action-button mic-button")}
165
- onClick={() => setMuted(!muted)}
166
- >
167
- {!muted ? (
168
- <span className="material-symbols-outlined filled">mic</span>
169
- ) : (
170
- <span className="material-symbols-outlined filled">mic_off</span>
171
- )}
172
- </button>
173
-
174
- <div className="action-button no-action outlined">
175
- <AudioPulse volume={volume} active={connected} hover={false} />
176
- </div>
177
-
178
- {supportsVideo && (
179
- <>
180
- <MediaStreamButton
181
- isStreaming={screenCapture.isStreaming}
182
- start={changeStreams(screenCapture)}
183
- stop={changeStreams()}
184
- onIcon="cancel_presentation"
185
- offIcon="present_to_all"
186
- />
187
- <MediaStreamButton
188
- isStreaming={webcam.isStreaming}
189
- start={changeStreams(webcam)}
190
- stop={changeStreams()}
191
- onIcon="videocam_off"
192
- offIcon="videocam"
193
- />
194
- </>
195
- )}
196
- {children}
197
- </nav>
198
-
199
- <div className={cn("connection-container", { connected })}>
200
- <div className="connection-button-container">
201
- <button
202
- ref={connectButtonRef}
203
- className={cn("action-button connect-toggle", { connected })}
204
- onClick={connected ? disconnect : connect}
205
- >
206
- <span className="material-symbols-outlined filled">
207
- {connected ? "pause" : "play_arrow"}
208
- </span>
209
- </button>
210
- </div>
211
- <span className="text-indicator">Streaming</span>
212
- </div>
213
- </section>
214
- );
215
- }
216
-
217
- export default memo(ControlTray);