google-adk 1.7.0__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google/adk/a2a/converters/request_converter.py +1 -2
- google/adk/a2a/logs/log_utils.py +1 -2
- google/adk/a2a/utils/__init__.py +0 -0
- google/adk/a2a/utils/agent_card_builder.py +544 -0
- google/adk/a2a/utils/agent_to_a2a.py +118 -0
- google/adk/agents/base_agent.py +6 -1
- google/adk/agents/config_schemas/AgentConfig.json +22 -0
- google/adk/agents/live_request_queue.py +15 -0
- google/adk/agents/llm_agent.py +11 -0
- google/adk/agents/loop_agent.py +6 -1
- google/adk/agents/remote_a2a_agent.py +2 -2
- google/adk/artifacts/gcs_artifact_service.py +86 -18
- google/adk/cli/browser/index.html +2 -2
- google/adk/cli/browser/{main-SRBSE46V.js → main-W7QZBYAR.js} +139 -139
- google/adk/cli/cli_eval.py +87 -12
- google/adk/cli/cli_tools_click.py +143 -82
- google/adk/cli/fast_api.py +136 -95
- google/adk/evaluation/eval_metrics.py +4 -0
- google/adk/evaluation/eval_sets_manager.py +5 -1
- google/adk/evaluation/final_response_match_v2.py +2 -2
- google/adk/evaluation/gcs_eval_sets_manager.py +2 -1
- google/adk/evaluation/local_eval_service.py +2 -2
- google/adk/evaluation/local_eval_set_results_manager.py +2 -2
- google/adk/evaluation/local_eval_sets_manager.py +1 -1
- google/adk/evaluation/metric_evaluator_registry.py +16 -6
- google/adk/evaluation/vertex_ai_eval_facade.py +7 -1
- google/adk/events/event.py +7 -2
- google/adk/flows/llm_flows/base_llm_flow.py +25 -6
- google/adk/flows/llm_flows/functions.py +13 -19
- google/adk/memory/in_memory_memory_service.py +1 -1
- google/adk/memory/vertex_ai_memory_bank_service.py +12 -10
- google/adk/models/anthropic_llm.py +2 -1
- google/adk/models/base_llm_connection.py +2 -0
- google/adk/models/gemini_llm_connection.py +17 -6
- google/adk/models/google_llm.py +35 -5
- google/adk/models/lite_llm.py +31 -18
- google/adk/sessions/database_session_service.py +25 -24
- google/adk/sessions/vertex_ai_session_service.py +13 -5
- google/adk/tools/__init__.py +2 -0
- google/adk/tools/_automatic_function_calling_util.py +20 -2
- google/adk/tools/agent_tool.py +14 -3
- google/adk/tools/base_toolset.py +22 -0
- google/adk/tools/bigquery/metadata_tool.py +2 -0
- google/adk/tools/bigquery/query_tool.py +15 -1
- google/adk/tools/computer_use/__init__.py +13 -0
- google/adk/tools/computer_use/base_computer.py +265 -0
- google/adk/tools/computer_use/computer_use_tool.py +166 -0
- google/adk/tools/computer_use/computer_use_toolset.py +220 -0
- google/adk/tools/exit_loop_tool.py +1 -0
- google/adk/tools/langchain_tool.py +14 -3
- google/adk/tools/openapi_tool/openapi_spec_parser/openapi_spec_parser.py +5 -0
- google/adk/version.py +1 -1
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/METADATA +2 -1
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/RECORD +57 -50
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/WHEEL +0 -0
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/entry_points.txt +0 -0
- {google_adk-1.7.0.dist-info → google_adk-1.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,265 @@
|
|
1
|
+
# Copyright 2025 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from __future__ import annotations
|
16
|
+
|
17
|
+
import abc
|
18
|
+
from enum import Enum
|
19
|
+
from typing import Literal
|
20
|
+
from typing import Optional
|
21
|
+
|
22
|
+
import pydantic
|
23
|
+
|
24
|
+
from ...utils.feature_decorator import experimental
|
25
|
+
|
26
|
+
|
27
|
+
@experimental
|
28
|
+
class ComputerEnvironment(str, Enum):
|
29
|
+
"""Case insensitive enum for computer environments."""
|
30
|
+
|
31
|
+
ENVIRONMENT_UNSPECIFIED = "ENVIRONMENT_UNSPECIFIED"
|
32
|
+
"""Defaults to browser."""
|
33
|
+
ENVIRONMENT_BROWSER = "ENVIRONMENT_BROWSER"
|
34
|
+
"""Operates in a web browser."""
|
35
|
+
|
36
|
+
|
37
|
+
@experimental
|
38
|
+
class ComputerState(pydantic.BaseModel):
|
39
|
+
"""Represents the current state of the computer environment.
|
40
|
+
|
41
|
+
Attributes:
|
42
|
+
screenshot: The screenshot in PNG format as bytes.
|
43
|
+
url: The current URL of the webpage being displayed.
|
44
|
+
"""
|
45
|
+
|
46
|
+
screenshot: bytes = pydantic.Field(
|
47
|
+
default=None, description="Screenshot in PNG format"
|
48
|
+
)
|
49
|
+
url: Optional[str] = pydantic.Field(
|
50
|
+
default=None, description="Current webpage URL"
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
@experimental
|
55
|
+
class BaseComputer(abc.ABC):
|
56
|
+
"""async defines an interface for computer environments.
|
57
|
+
|
58
|
+
This abstract base class async defines the standard interface for controlling
|
59
|
+
computer environments, including web browsers and other interactive systems.
|
60
|
+
"""
|
61
|
+
|
62
|
+
@abc.abstractmethod
|
63
|
+
async def screen_size(self) -> tuple[int, int]:
|
64
|
+
"""Returns the screen size of the environment.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
A tuple of (width, height) in pixels.
|
68
|
+
"""
|
69
|
+
|
70
|
+
@abc.abstractmethod
|
71
|
+
async def open_web_browser(self) -> ComputerState:
|
72
|
+
"""Opens the web browser.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
The current state after opening the browser.
|
76
|
+
"""
|
77
|
+
|
78
|
+
@abc.abstractmethod
|
79
|
+
async def click_at(self, x: int, y: int) -> ComputerState:
|
80
|
+
"""Clicks at a specific x, y coordinate on the webpage.
|
81
|
+
|
82
|
+
The 'x' and 'y' values are absolute values, scaled to the height and width of the screen.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
x: The x-coordinate to click at.
|
86
|
+
y: The y-coordinate to click at.
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
The current state after clicking.
|
90
|
+
"""
|
91
|
+
|
92
|
+
@abc.abstractmethod
|
93
|
+
async def hover_at(self, x: int, y: int) -> ComputerState:
|
94
|
+
"""Hovers at a specific x, y coordinate on the webpage.
|
95
|
+
|
96
|
+
May be used to explore sub-menus that appear on hover.
|
97
|
+
The 'x' and 'y' values are absolute values, scaled to the height and width of the screen.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
x: The x-coordinate to hover at.
|
101
|
+
y: The y-coordinate to hover at.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
The current state after hovering.
|
105
|
+
"""
|
106
|
+
|
107
|
+
@abc.abstractmethod
|
108
|
+
async def type_text_at(
|
109
|
+
self,
|
110
|
+
x: int,
|
111
|
+
y: int,
|
112
|
+
text: str,
|
113
|
+
press_enter: bool = True,
|
114
|
+
clear_before_typing: bool = True,
|
115
|
+
) -> ComputerState:
|
116
|
+
"""Types text at a specific x, y coordinate.
|
117
|
+
|
118
|
+
The system automatically presses ENTER after typing. To disable this, set `press_enter` to False.
|
119
|
+
The system automatically clears any existing content before typing the specified `text`. To disable this, set `clear_before_typing` to False.
|
120
|
+
The 'x' and 'y' values are absolute values, scaled to the height and width of the screen.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
x: The x-coordinate to type at.
|
124
|
+
y: The y-coordinate to type at.
|
125
|
+
text: The text to type.
|
126
|
+
press_enter: Whether to press ENTER after typing.
|
127
|
+
clear_before_typing: Whether to clear existing content before typing.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
The current state after typing.
|
131
|
+
"""
|
132
|
+
|
133
|
+
@abc.abstractmethod
|
134
|
+
async def scroll_document(
|
135
|
+
self, direction: Literal["up", "down", "left", "right"]
|
136
|
+
) -> ComputerState:
|
137
|
+
"""Scrolls the entire webpage "up", "down", "left" or "right" based on direction.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
direction: The direction to scroll.
|
141
|
+
|
142
|
+
Returns:
|
143
|
+
The current state after scrolling.
|
144
|
+
"""
|
145
|
+
|
146
|
+
@abc.abstractmethod
|
147
|
+
async def scroll_at(
|
148
|
+
self,
|
149
|
+
x: int,
|
150
|
+
y: int,
|
151
|
+
direction: Literal["up", "down", "left", "right"],
|
152
|
+
magnitude: int,
|
153
|
+
) -> ComputerState:
|
154
|
+
"""Scrolls up, down, right, or left at a x, y coordinate by magnitude.
|
155
|
+
|
156
|
+
The 'x' and 'y' values are absolute values, scaled to the height and width of the screen.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
x: The x-coordinate to scroll at.
|
160
|
+
y: The y-coordinate to scroll at.
|
161
|
+
direction: The direction to scroll.
|
162
|
+
magnitude: The amount to scroll.
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
The current state after scrolling.
|
166
|
+
"""
|
167
|
+
|
168
|
+
@abc.abstractmethod
|
169
|
+
async def wait(self, seconds: int) -> ComputerState:
|
170
|
+
"""Waits for n seconds to allow unfinished webpage processes to complete.
|
171
|
+
|
172
|
+
Args:
|
173
|
+
seconds: The number of seconds to wait.
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
The current state after waiting.
|
177
|
+
"""
|
178
|
+
|
179
|
+
@abc.abstractmethod
|
180
|
+
async def go_back(self) -> ComputerState:
|
181
|
+
"""Navigates back to the previous webpage in the browser history.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
The current state after navigating back.
|
185
|
+
"""
|
186
|
+
|
187
|
+
@abc.abstractmethod
|
188
|
+
async def go_forward(self) -> ComputerState:
|
189
|
+
"""Navigates forward to the next webpage in the browser history.
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
The current state after navigating forward.
|
193
|
+
"""
|
194
|
+
|
195
|
+
@abc.abstractmethod
|
196
|
+
async def search(self) -> ComputerState:
|
197
|
+
"""Directly jumps to a search engine home page.
|
198
|
+
|
199
|
+
Used when you need to start with a search. For example, this is used when
|
200
|
+
the current website doesn't have the information needed or because a new
|
201
|
+
task is being started.
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
The current state after navigating to search.
|
205
|
+
"""
|
206
|
+
|
207
|
+
@abc.abstractmethod
|
208
|
+
async def navigate(self, url: str) -> ComputerState:
|
209
|
+
"""Navigates directly to a specified URL.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
url: The URL to navigate to.
|
213
|
+
|
214
|
+
Returns:
|
215
|
+
The current state after navigation.
|
216
|
+
"""
|
217
|
+
|
218
|
+
@abc.abstractmethod
|
219
|
+
async def key_combination(self, keys: list[str]) -> ComputerState:
|
220
|
+
"""Presses keyboard keys and combinations, such as "control+c" or "enter".
|
221
|
+
|
222
|
+
Args:
|
223
|
+
keys: List of keys to press in combination.
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
The current state after key press.
|
227
|
+
"""
|
228
|
+
|
229
|
+
@abc.abstractmethod
|
230
|
+
async def drag_and_drop(
|
231
|
+
self, x: int, y: int, destination_x: int, destination_y: int
|
232
|
+
) -> ComputerState:
|
233
|
+
"""Drag and drop an element from a x, y coordinate to a destination destination_y, destination_x coordinate.
|
234
|
+
|
235
|
+
The 'x', 'y', 'destination_y' and 'destination_x' values are absolute values, scaled to the height and width of the screen.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
x: The x-coordinate to start dragging from.
|
239
|
+
y: The y-coordinate to start dragging from.
|
240
|
+
destination_x: The x-coordinate to drop at.
|
241
|
+
destination_y: The y-coordinate to drop at.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
The current state after drag and drop.
|
245
|
+
"""
|
246
|
+
|
247
|
+
@abc.abstractmethod
|
248
|
+
async def current_state(self) -> ComputerState:
|
249
|
+
"""Returns the current state of the current webpage.
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
The current environment state.
|
253
|
+
"""
|
254
|
+
|
255
|
+
async def initialize(self) -> None:
|
256
|
+
"""Initialize the computer."""
|
257
|
+
pass
|
258
|
+
|
259
|
+
async def close(self) -> None:
|
260
|
+
"""Cleanup resource of the computer."""
|
261
|
+
pass
|
262
|
+
|
263
|
+
@abc.abstractmethod
|
264
|
+
async def environment(self) -> ComputerEnvironment:
|
265
|
+
"""Returns the environment of the computer."""
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# Copyright 2025 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from __future__ import annotations
|
16
|
+
|
17
|
+
import base64
|
18
|
+
import logging
|
19
|
+
from typing import Any
|
20
|
+
from typing import Callable
|
21
|
+
|
22
|
+
from google.genai import types
|
23
|
+
from typing_extensions import override
|
24
|
+
|
25
|
+
from ...models.llm_request import LlmRequest
|
26
|
+
from ...utils.feature_decorator import experimental
|
27
|
+
from ..function_tool import FunctionTool
|
28
|
+
from ..tool_context import ToolContext
|
29
|
+
from .base_computer import ComputerState
|
30
|
+
|
31
|
+
logger = logging.getLogger("google_adk." + __name__)
|
32
|
+
|
33
|
+
|
34
|
+
@experimental
|
35
|
+
class ComputerUseTool(FunctionTool):
|
36
|
+
"""A tool that wraps computer control functions for use with LLMs.
|
37
|
+
|
38
|
+
This tool automatically normalizes coordinates from a virtual coordinate space
|
39
|
+
(by default 1000x1000) to the actual screen size. This allows LLMs to work
|
40
|
+
with a consistent coordinate system regardless of the actual screen dimensions,
|
41
|
+
making their output more predictable and easier to handle.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
*,
|
47
|
+
func: Callable[..., Any],
|
48
|
+
screen_size: tuple[int, int],
|
49
|
+
virtual_screen_size: tuple[int, int] = (1000, 1000),
|
50
|
+
):
|
51
|
+
"""Initialize the ComputerUseTool.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
func: The computer control function to wrap.
|
55
|
+
screen_size: The actual screen size as (width, height) in pixels.
|
56
|
+
This represents the real dimensions of the target screen/display.
|
57
|
+
virtual_screen_size: The virtual coordinate space dimensions as (width, height)
|
58
|
+
that the LLM uses to specify coordinates. Coordinates from the LLM are
|
59
|
+
automatically normalized from this virtual space to the actual screen_size.
|
60
|
+
Default is (1000, 1000), meaning the LLM thinks it's working with a
|
61
|
+
1000x1000 pixel screen regardless of the actual screen dimensions.
|
62
|
+
|
63
|
+
Raises:
|
64
|
+
ValueError: If screen_size or virtual_screen_size is not a valid tuple
|
65
|
+
of positive integers.
|
66
|
+
"""
|
67
|
+
super().__init__(func=func)
|
68
|
+
self._screen_size = screen_size
|
69
|
+
self._coordinate_space = virtual_screen_size
|
70
|
+
|
71
|
+
# Validate screen size
|
72
|
+
if not isinstance(screen_size, tuple) or len(screen_size) != 2:
|
73
|
+
raise ValueError("screen_size must be a tuple of (width, height)")
|
74
|
+
if screen_size[0] <= 0 or screen_size[1] <= 0:
|
75
|
+
raise ValueError("screen_size dimensions must be positive")
|
76
|
+
|
77
|
+
# Validate virtual screen size
|
78
|
+
if (
|
79
|
+
not isinstance(virtual_screen_size, tuple)
|
80
|
+
or len(virtual_screen_size) != 2
|
81
|
+
):
|
82
|
+
raise ValueError("virtual_screen_size must be a tuple of (width, height)")
|
83
|
+
if virtual_screen_size[0] <= 0 or virtual_screen_size[1] <= 0:
|
84
|
+
raise ValueError("virtual_screen_size dimensions must be positive")
|
85
|
+
|
86
|
+
def _normalize_x(self, x: int) -> int:
|
87
|
+
"""Normalize x coordinate from virtual screen space to actual screen width."""
|
88
|
+
if not isinstance(x, (int, float)):
|
89
|
+
raise ValueError(f"x coordinate must be numeric, got {type(x)}")
|
90
|
+
|
91
|
+
normalized = int(x / self._coordinate_space[0] * self._screen_size[0])
|
92
|
+
# Clamp to screen bounds
|
93
|
+
return max(0, min(normalized, self._screen_size[0] - 1))
|
94
|
+
|
95
|
+
def _normalize_y(self, y: int) -> int:
|
96
|
+
"""Normalize y coordinate from virtual screen space to actual screen height."""
|
97
|
+
if not isinstance(y, (int, float)):
|
98
|
+
raise ValueError(f"y coordinate must be numeric, got {type(y)}")
|
99
|
+
|
100
|
+
normalized = int(y / self._coordinate_space[1] * self._screen_size[1])
|
101
|
+
# Clamp to screen bounds
|
102
|
+
return max(0, min(normalized, self._screen_size[1] - 1))
|
103
|
+
|
104
|
+
@override
|
105
|
+
async def run_async(
|
106
|
+
self, *, args: dict[str, Any], tool_context: ToolContext
|
107
|
+
) -> Any:
|
108
|
+
"""Run the computer control function with normalized coordinates."""
|
109
|
+
|
110
|
+
try:
|
111
|
+
# Normalize coordinates if present
|
112
|
+
if "x" in args:
|
113
|
+
original_x = args["x"]
|
114
|
+
args["x"] = self._normalize_x(args["x"])
|
115
|
+
logger.debug("Normalized x: %s -> %s", original_x, args["x"])
|
116
|
+
|
117
|
+
if "y" in args:
|
118
|
+
original_y = args["y"]
|
119
|
+
args["y"] = self._normalize_y(args["y"])
|
120
|
+
logger.debug("Normalized y: %s -> %s", original_y, args["y"])
|
121
|
+
|
122
|
+
# Handle destination coordinates for drag and drop
|
123
|
+
if "destination_x" in args:
|
124
|
+
original_dest_x = args["destination_x"]
|
125
|
+
args["destination_x"] = self._normalize_x(args["destination_x"])
|
126
|
+
logger.debug(
|
127
|
+
"Normalized destination_x: %s -> %s",
|
128
|
+
original_dest_x,
|
129
|
+
args["destination_x"],
|
130
|
+
)
|
131
|
+
|
132
|
+
if "destination_y" in args:
|
133
|
+
original_dest_y = args["destination_y"]
|
134
|
+
args["destination_y"] = self._normalize_y(args["destination_y"])
|
135
|
+
logger.debug(
|
136
|
+
"Normalized destination_y: %s -> %s",
|
137
|
+
original_dest_y,
|
138
|
+
args["destination_y"],
|
139
|
+
)
|
140
|
+
|
141
|
+
# Execute the actual computer control function
|
142
|
+
result = await super().run_async(args=args, tool_context=tool_context)
|
143
|
+
|
144
|
+
# Process the result if it's an EnvironmentState
|
145
|
+
if isinstance(result, ComputerState):
|
146
|
+
return {
|
147
|
+
"image": {
|
148
|
+
"mimetype": "image/png",
|
149
|
+
"data": base64.b64encode(result.screenshot).decode("utf-8"),
|
150
|
+
},
|
151
|
+
"url": result.url,
|
152
|
+
}
|
153
|
+
|
154
|
+
return result
|
155
|
+
|
156
|
+
except Exception as e:
|
157
|
+
logger.error("Error in ComputerUseTool.run_async: %s", e)
|
158
|
+
raise
|
159
|
+
|
160
|
+
@override
|
161
|
+
async def process_llm_request(
|
162
|
+
self, *, tool_context: ToolContext, llm_request: LlmRequest
|
163
|
+
) -> None:
|
164
|
+
"""ComputerUseToolset will add this tool to the LLM request and add computer
|
165
|
+
use configuration to the LLM request."""
|
166
|
+
pass
|
@@ -0,0 +1,220 @@
|
|
1
|
+
# Copyright 2025 Google LLC
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from __future__ import annotations
|
16
|
+
|
17
|
+
import asyncio
|
18
|
+
import logging
|
19
|
+
from typing import Any
|
20
|
+
from typing import Callable
|
21
|
+
from typing import Optional
|
22
|
+
from typing import Union
|
23
|
+
|
24
|
+
from google.genai import types
|
25
|
+
from typing_extensions import override
|
26
|
+
|
27
|
+
from ...agents.readonly_context import ReadonlyContext
|
28
|
+
from ...models.llm_request import LlmRequest
|
29
|
+
from ...utils.feature_decorator import experimental
|
30
|
+
from ..base_toolset import BaseToolset
|
31
|
+
from ..tool_context import ToolContext
|
32
|
+
from .base_computer import BaseComputer
|
33
|
+
from .computer_use_tool import ComputerUseTool
|
34
|
+
|
35
|
+
# Methods that should be excluded when creating tools from BaseComputer methods
|
36
|
+
EXCLUDED_METHODS = {"screen_size", "environment", "close"}
|
37
|
+
|
38
|
+
logger = logging.getLogger("google_adk." + __name__)
|
39
|
+
|
40
|
+
|
41
|
+
@experimental
|
42
|
+
class ComputerUseToolset(BaseToolset):
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self,
|
46
|
+
*,
|
47
|
+
computer: BaseComputer,
|
48
|
+
):
|
49
|
+
super().__init__()
|
50
|
+
self._computer = computer
|
51
|
+
self._initialized = False
|
52
|
+
self._tools = None
|
53
|
+
|
54
|
+
async def _ensure_initialized(self) -> None:
|
55
|
+
if not self._initialized:
|
56
|
+
await self._computer.initialize()
|
57
|
+
self._initialized = True
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
async def adapt_computer_use_tool(
|
61
|
+
method_name: str,
|
62
|
+
adapter_func: Union[
|
63
|
+
Callable[[Callable[..., Any]], Callable[..., Any]],
|
64
|
+
Callable[[Callable[..., Any]], Any],
|
65
|
+
],
|
66
|
+
llm_request: LlmRequest,
|
67
|
+
) -> None:
|
68
|
+
"""Adapt a computer use tool by replacing it with a modified version.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
method_name: The name of the method (of BaseComputer class) to adapt (e.g. 'wait').
|
72
|
+
adapter_func: A function that accepts existing computer use async function and returns a new computer use async function.
|
73
|
+
Can be either sync or async function. The name of the returned function will be used as the new tool name.
|
74
|
+
llm_request: The LLM request containing the tools dictionary.
|
75
|
+
"""
|
76
|
+
# Validate that the method is a valid BaseComputer method
|
77
|
+
if method_name in EXCLUDED_METHODS:
|
78
|
+
logger.warning(
|
79
|
+
"Method %s is not a valid BaseComputer method", method_name
|
80
|
+
)
|
81
|
+
return
|
82
|
+
|
83
|
+
# Check if it's a method defined in BaseComputer class
|
84
|
+
attr = getattr(BaseComputer, method_name, None)
|
85
|
+
if attr is None or not callable(attr):
|
86
|
+
logger.warning(
|
87
|
+
"Method %s is not a valid BaseComputer method", method_name
|
88
|
+
)
|
89
|
+
return
|
90
|
+
|
91
|
+
if method_name not in llm_request.tools_dict:
|
92
|
+
logger.warning("Method %s not found in tools_dict", method_name)
|
93
|
+
return
|
94
|
+
|
95
|
+
original_tool = llm_request.tools_dict[method_name]
|
96
|
+
|
97
|
+
# Create the adapted function using the adapter
|
98
|
+
# Handle both sync and async adapter functions
|
99
|
+
if asyncio.iscoroutinefunction(adapter_func):
|
100
|
+
# If adapter_func is async, await it to get the adapted function
|
101
|
+
adapted_func = await adapter_func(original_tool.func)
|
102
|
+
else:
|
103
|
+
# If adapter_func is sync, call it directly
|
104
|
+
adapted_func = adapter_func(original_tool.func)
|
105
|
+
|
106
|
+
# Get the name from the adapted function
|
107
|
+
new_method_name = adapted_func.__name__
|
108
|
+
|
109
|
+
# Create a new ComputerUseTool with the adapted function
|
110
|
+
adapted_tool = ComputerUseTool(
|
111
|
+
func=adapted_func,
|
112
|
+
screen_size=original_tool._screen_size,
|
113
|
+
virtual_screen_size=original_tool._coordinate_space,
|
114
|
+
)
|
115
|
+
|
116
|
+
# Add the adapted tool and remove the original
|
117
|
+
llm_request.tools_dict[new_method_name] = adapted_tool
|
118
|
+
del llm_request.tools_dict[method_name]
|
119
|
+
|
120
|
+
logger.debug(
|
121
|
+
"Adapted tool %s to %s with adapter function",
|
122
|
+
method_name,
|
123
|
+
new_method_name,
|
124
|
+
)
|
125
|
+
|
126
|
+
@override
|
127
|
+
async def get_tools(
|
128
|
+
self,
|
129
|
+
readonly_context: Optional[ReadonlyContext] = None,
|
130
|
+
) -> list[ComputerUseTool]:
|
131
|
+
if self._tools:
|
132
|
+
return self._tools
|
133
|
+
await self._ensure_initialized()
|
134
|
+
# Get screen size for tool configuration
|
135
|
+
screen_size = await self._computer.screen_size()
|
136
|
+
|
137
|
+
# Get all methods defined in Computer abstract base class, excluding specified methods
|
138
|
+
computer_methods = []
|
139
|
+
|
140
|
+
# Get all methods defined in the Computer ABC interface
|
141
|
+
for method_name in dir(BaseComputer):
|
142
|
+
# Skip private methods (starting with underscore)
|
143
|
+
if method_name.startswith("_"):
|
144
|
+
continue
|
145
|
+
|
146
|
+
# Skip excluded methods
|
147
|
+
if method_name in EXCLUDED_METHODS:
|
148
|
+
continue
|
149
|
+
|
150
|
+
# Check if it's a method defined in Computer class
|
151
|
+
attr = getattr(BaseComputer, method_name, None)
|
152
|
+
if attr is not None and callable(attr):
|
153
|
+
# Get the corresponding method from the concrete instance
|
154
|
+
instance_method = getattr(self._computer, method_name)
|
155
|
+
computer_methods.append(instance_method)
|
156
|
+
|
157
|
+
# Create ComputerUseTool instances for each method
|
158
|
+
|
159
|
+
self._tools = [
|
160
|
+
ComputerUseTool(
|
161
|
+
func=method,
|
162
|
+
screen_size=screen_size,
|
163
|
+
)
|
164
|
+
for method in computer_methods
|
165
|
+
]
|
166
|
+
return self._tools
|
167
|
+
|
168
|
+
@override
|
169
|
+
async def close(self) -> None:
|
170
|
+
await self._computer.close()
|
171
|
+
|
172
|
+
@override
|
173
|
+
async def process_llm_request(
|
174
|
+
self, *, tool_context: ToolContext, llm_request: LlmRequest
|
175
|
+
) -> None:
|
176
|
+
"""Add its tools to the LLM request and add computer
|
177
|
+
use configuration to the LLM request."""
|
178
|
+
try:
|
179
|
+
|
180
|
+
# Add this tool to the tools dictionary
|
181
|
+
if not self._tools:
|
182
|
+
await self.get_tools()
|
183
|
+
|
184
|
+
for tool in self._tools:
|
185
|
+
llm_request.tools_dict[tool.name] = tool
|
186
|
+
|
187
|
+
# Initialize config if needed
|
188
|
+
llm_request.config = llm_request.config or types.GenerateContentConfig()
|
189
|
+
llm_request.config.tools = llm_request.config.tools or []
|
190
|
+
|
191
|
+
# Check if computer use is already configured
|
192
|
+
for tool in llm_request.config.tools:
|
193
|
+
if (
|
194
|
+
isinstance(tool, (types.Tool, types.ToolDict))
|
195
|
+
and hasattr(tool, "computer_use")
|
196
|
+
and tool.computer_use
|
197
|
+
):
|
198
|
+
logger.debug("Computer use already configured in LLM request")
|
199
|
+
return
|
200
|
+
|
201
|
+
# Add computer use tool configuration
|
202
|
+
computer_environment = await self._computer.environment()
|
203
|
+
environment = getattr(
|
204
|
+
types.Environment,
|
205
|
+
computer_environment.name,
|
206
|
+
types.Environment.ENVIRONMENT_BROWSER,
|
207
|
+
)
|
208
|
+
llm_request.config.tools.append(
|
209
|
+
types.Tool(
|
210
|
+
computer_use=types.ToolComputerUse(environment=environment)
|
211
|
+
)
|
212
|
+
)
|
213
|
+
logger.debug(
|
214
|
+
"Added computer use tool with environment: %s",
|
215
|
+
environment,
|
216
|
+
)
|
217
|
+
|
218
|
+
except Exception as e:
|
219
|
+
logger.error("Error in ComputerUseToolset.process_llm_request: %s", e)
|
220
|
+
raise
|
@@ -59,15 +59,26 @@ class LangchainTool(FunctionTool):
|
|
59
59
|
name: Optional[str] = None,
|
60
60
|
description: Optional[str] = None,
|
61
61
|
):
|
62
|
-
# Check if the tool has a 'run' method
|
63
62
|
if not hasattr(tool, 'run') and not hasattr(tool, '_run'):
|
64
|
-
raise ValueError(
|
63
|
+
raise ValueError(
|
64
|
+
"Tool must be a Langchain tool, have a 'run' or '_run' method."
|
65
|
+
)
|
65
66
|
|
66
67
|
# Determine which function to use
|
67
68
|
if isinstance(tool, StructuredTool):
|
68
69
|
func = tool.func
|
69
|
-
|
70
|
+
# For async tools, func might be None but coroutine exists
|
71
|
+
if func is None and hasattr(tool, 'coroutine') and tool.coroutine:
|
72
|
+
func = tool.coroutine
|
73
|
+
elif hasattr(tool, '_run') or hasattr(tool, 'run'):
|
70
74
|
func = tool._run if hasattr(tool, '_run') else tool.run
|
75
|
+
else:
|
76
|
+
raise ValueError(
|
77
|
+
"This is not supported. Tool must be a Langchain tool, have a 'run'"
|
78
|
+
" or '_run' method. The tool is: ",
|
79
|
+
type(tool),
|
80
|
+
)
|
81
|
+
|
71
82
|
super().__init__(func)
|
72
83
|
# run_manager is a special parameter for langchain tool
|
73
84
|
self._ignore_params.append('run_manager')
|
@@ -111,6 +111,11 @@ class OpenApiSpecParser:
|
|
111
111
|
if operation_dict is None:
|
112
112
|
continue
|
113
113
|
|
114
|
+
# Append path-level parameters
|
115
|
+
operation_dict["parameters"] = operation_dict.get(
|
116
|
+
"parameters", []
|
117
|
+
) + path_item.get("parameters", [])
|
118
|
+
|
114
119
|
# If operation ID is missing, assign an operation id based on path
|
115
120
|
# and method
|
116
121
|
if "operationId" not in operation_dict:
|