openhands-tools 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhands_tools-1.2.0/PKG-INFO +13 -0
- openhands_tools-1.2.0/openhands/tools/__init__.py +9 -0
- openhands_tools-1.2.0/openhands/tools/browser_use/__init__.py +55 -0
- openhands_tools-1.2.0/openhands/tools/browser_use/definition.py +574 -0
- openhands_tools-1.2.0/openhands/tools/browser_use/impl.py +354 -0
- openhands_tools-1.2.0/openhands/tools/browser_use/server.py +100 -0
- openhands_tools-1.2.0/openhands/tools/delegate/__init__.py +18 -0
- openhands_tools-1.2.0/openhands/tools/delegate/definition.py +116 -0
- openhands_tools-1.2.0/openhands/tools/delegate/impl.py +271 -0
- openhands_tools-1.2.0/openhands/tools/delegate/visualizer.py +230 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/__init__.py +15 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/definition.py +258 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/editor.py +739 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/exceptions.py +54 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/impl.py +107 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/__init__.py +0 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/config.py +2 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/constants.py +9 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/diff.py +124 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/encoding.py +137 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/file_cache.py +158 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/history.py +123 -0
- openhands_tools-1.2.0/openhands/tools/file_editor/utils/shell.py +71 -0
- openhands_tools-1.2.0/openhands/tools/glob/__init__.py +15 -0
- openhands_tools-1.2.0/openhands/tools/glob/definition.py +115 -0
- openhands_tools-1.2.0/openhands/tools/glob/impl.py +278 -0
- openhands_tools-1.2.0/openhands/tools/grep/__init__.py +16 -0
- openhands_tools-1.2.0/openhands/tools/grep/definition.py +117 -0
- openhands_tools-1.2.0/openhands/tools/grep/impl.py +250 -0
- openhands_tools-1.2.0/openhands/tools/planning_file_editor/__init__.py +6 -0
- openhands_tools-1.2.0/openhands/tools/planning_file_editor/definition.py +120 -0
- openhands_tools-1.2.0/openhands/tools/planning_file_editor/impl.py +66 -0
- openhands_tools-1.2.0/openhands/tools/preset/__init__.py +25 -0
- openhands_tools-1.2.0/openhands/tools/preset/default.py +88 -0
- openhands_tools-1.2.0/openhands/tools/preset/planning.py +171 -0
- openhands_tools-1.2.0/openhands/tools/py.typed +0 -0
- openhands_tools-1.2.0/openhands/tools/task_tracker/__init__.py +14 -0
- openhands_tools-1.2.0/openhands/tools/task_tracker/definition.py +432 -0
- openhands_tools-1.2.0/openhands/tools/terminal/__init__.py +28 -0
- openhands_tools-1.2.0/openhands/tools/terminal/constants.py +31 -0
- openhands_tools-1.2.0/openhands/tools/terminal/definition.py +287 -0
- openhands_tools-1.2.0/openhands/tools/terminal/impl.py +191 -0
- openhands_tools-1.2.0/openhands/tools/terminal/metadata.py +101 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/__init__.py +24 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/factory.py +122 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/interface.py +229 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/subprocess_terminal.py +452 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/terminal_session.py +502 -0
- openhands_tools-1.2.0/openhands/tools/terminal/terminal/tmux_terminal.py +177 -0
- openhands_tools-1.2.0/openhands/tools/terminal/utils/command.py +150 -0
- openhands_tools-1.2.0/openhands/tools/utils/__init__.py +45 -0
- openhands_tools-1.2.0/openhands/tools/utils/timeout.py +14 -0
- openhands_tools-1.2.0/openhands_tools.egg-info/PKG-INFO +13 -0
- openhands_tools-1.2.0/openhands_tools.egg-info/SOURCES.txt +108 -0
- openhands_tools-1.2.0/openhands_tools.egg-info/dependency_links.txt +1 -0
- openhands_tools-1.2.0/openhands_tools.egg-info/requires.txt +8 -0
- openhands_tools-1.2.0/openhands_tools.egg-info/top_level.txt +1 -0
- openhands_tools-1.2.0/pyproject.toml +30 -0
- openhands_tools-1.2.0/setup.cfg +4 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openhands-tools
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: OpenHands Tools - Runtime tools for AI agents
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: openhands-sdk
|
|
7
|
+
Requires-Dist: bashlex>=0.18
|
|
8
|
+
Requires-Dist: binaryornot>=0.4.4
|
|
9
|
+
Requires-Dist: cachetools
|
|
10
|
+
Requires-Dist: libtmux>=0.46.2
|
|
11
|
+
Requires-Dist: pydantic>=2.11.7
|
|
12
|
+
Requires-Dist: browser-use>=0.8.0
|
|
13
|
+
Requires-Dist: func-timeout>=4.3.5
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Browser tools using browser-use integration."""
|
|
2
|
+
|
|
3
|
+
from openhands.tools.browser_use.definition import (
|
|
4
|
+
BrowserClickAction,
|
|
5
|
+
BrowserClickTool,
|
|
6
|
+
BrowserCloseTabAction,
|
|
7
|
+
BrowserCloseTabTool,
|
|
8
|
+
BrowserGetContentAction,
|
|
9
|
+
BrowserGetContentTool,
|
|
10
|
+
BrowserGetStateAction,
|
|
11
|
+
BrowserGetStateTool,
|
|
12
|
+
BrowserGoBackAction,
|
|
13
|
+
BrowserGoBackTool,
|
|
14
|
+
BrowserListTabsAction,
|
|
15
|
+
BrowserListTabsTool,
|
|
16
|
+
BrowserNavigateAction,
|
|
17
|
+
BrowserNavigateTool,
|
|
18
|
+
BrowserObservation,
|
|
19
|
+
BrowserScrollAction,
|
|
20
|
+
BrowserScrollTool,
|
|
21
|
+
BrowserSwitchTabAction,
|
|
22
|
+
BrowserSwitchTabTool,
|
|
23
|
+
BrowserToolSet,
|
|
24
|
+
BrowserTypeAction,
|
|
25
|
+
BrowserTypeTool,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Tool classes
|
|
31
|
+
"BrowserNavigateTool",
|
|
32
|
+
"BrowserClickTool",
|
|
33
|
+
"BrowserTypeTool",
|
|
34
|
+
"BrowserGetStateTool",
|
|
35
|
+
"BrowserGetContentTool",
|
|
36
|
+
"BrowserScrollTool",
|
|
37
|
+
"BrowserGoBackTool",
|
|
38
|
+
"BrowserListTabsTool",
|
|
39
|
+
"BrowserSwitchTabTool",
|
|
40
|
+
"BrowserCloseTabTool",
|
|
41
|
+
# Actions
|
|
42
|
+
"BrowserNavigateAction",
|
|
43
|
+
"BrowserClickAction",
|
|
44
|
+
"BrowserTypeAction",
|
|
45
|
+
"BrowserGetStateAction",
|
|
46
|
+
"BrowserGetContentAction",
|
|
47
|
+
"BrowserScrollAction",
|
|
48
|
+
"BrowserGoBackAction",
|
|
49
|
+
"BrowserListTabsAction",
|
|
50
|
+
"BrowserSwitchTabAction",
|
|
51
|
+
"BrowserCloseTabAction",
|
|
52
|
+
# Observations
|
|
53
|
+
"BrowserObservation",
|
|
54
|
+
"BrowserToolSet",
|
|
55
|
+
]
|
|
@@ -0,0 +1,574 @@
|
|
|
1
|
+
"""Browser-use tool implementation for web automation."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import TYPE_CHECKING, Literal, Self
|
|
5
|
+
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
from openhands.sdk.llm import ImageContent, TextContent
|
|
9
|
+
from openhands.sdk.tool import (
|
|
10
|
+
Action,
|
|
11
|
+
Observation,
|
|
12
|
+
ToolAnnotations,
|
|
13
|
+
ToolDefinition,
|
|
14
|
+
register_tool,
|
|
15
|
+
)
|
|
16
|
+
from openhands.sdk.utils import maybe_truncate
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Lazy import to avoid hanging during module import
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from openhands.tools.browser_use.impl import BrowserToolExecutor
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Maximum output size for browser observations
|
|
25
|
+
MAX_BROWSER_OUTPUT_SIZE = 50000
|
|
26
|
+
|
|
27
|
+
# Mapping of base64 prefixes to MIME types for image detection
|
|
28
|
+
BASE64_IMAGE_PREFIXES = {
|
|
29
|
+
"/9j/": "image/jpeg",
|
|
30
|
+
"iVBORw0KGgo": "image/png",
|
|
31
|
+
"R0lGODlh": "image/gif",
|
|
32
|
+
"UklGR": "image/webp",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def detect_image_mime_type(base64_data: str) -> str:
|
|
37
|
+
"""Detect MIME type from base64-encoded image data.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
base64_data: Base64-encoded image data
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Detected MIME type, defaults to "image/png" if not detected
|
|
44
|
+
"""
|
|
45
|
+
for prefix, mime_type in BASE64_IMAGE_PREFIXES.items():
|
|
46
|
+
if base64_data.startswith(prefix):
|
|
47
|
+
return mime_type
|
|
48
|
+
return "image/png"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BrowserObservation(Observation):
|
|
52
|
+
"""Base observation for browser operations."""
|
|
53
|
+
|
|
54
|
+
screenshot_data: str | None = Field(
|
|
55
|
+
default=None, description="Base64 screenshot data if available"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
|
|
60
|
+
llm_content: list[TextContent | ImageContent] = []
|
|
61
|
+
|
|
62
|
+
# If is_error is true, prepend error message
|
|
63
|
+
if self.is_error:
|
|
64
|
+
llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
|
|
65
|
+
|
|
66
|
+
# Get text content and truncate if needed
|
|
67
|
+
content_text = self.text
|
|
68
|
+
if content_text:
|
|
69
|
+
llm_content.append(
|
|
70
|
+
TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE))
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if self.screenshot_data:
|
|
74
|
+
mime_type = detect_image_mime_type(self.screenshot_data)
|
|
75
|
+
# Convert base64 to data URL format for ImageContent
|
|
76
|
+
data_url = f"data:{mime_type};base64,{self.screenshot_data}"
|
|
77
|
+
llm_content.append(ImageContent(image_urls=[data_url]))
|
|
78
|
+
|
|
79
|
+
return llm_content
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ============================================
|
|
83
|
+
# Base Browser Action
|
|
84
|
+
# ============================================
|
|
85
|
+
class BrowserAction(Action):
|
|
86
|
+
"""Base class for all browser actions.
|
|
87
|
+
|
|
88
|
+
This base class serves as the parent for all browser-related actions,
|
|
89
|
+
enabling proper type hierarchy and eliminating the need for union types.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ============================================
|
|
96
|
+
# `go_to_url`
|
|
97
|
+
# ============================================
|
|
98
|
+
class BrowserNavigateAction(BrowserAction):
|
|
99
|
+
"""Schema for browser navigation."""
|
|
100
|
+
|
|
101
|
+
url: str = Field(description="The URL to navigate to")
|
|
102
|
+
new_tab: bool = Field(
|
|
103
|
+
default=False, description="Whether to open in a new tab. Default: False"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
BROWSER_NAVIGATE_DESCRIPTION = """Navigate to a URL in the browser.
|
|
108
|
+
|
|
109
|
+
This tool allows you to navigate to any web page. You can optionally open the URL in a new tab.
|
|
110
|
+
|
|
111
|
+
Parameters:
|
|
112
|
+
- url: The URL to navigate to (required)
|
|
113
|
+
- new_tab: Whether to open in a new tab (optional, default: False)
|
|
114
|
+
|
|
115
|
+
Examples:
|
|
116
|
+
- Navigate to Google: url="https://www.google.com"
|
|
117
|
+
- Open GitHub in new tab: url="https://github.com", new_tab=True
|
|
118
|
+
""" # noqa: E501
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class BrowserNavigateTool(ToolDefinition[BrowserNavigateAction, BrowserObservation]):
|
|
122
|
+
"""Tool for browser navigation."""
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
126
|
+
return [
|
|
127
|
+
cls(
|
|
128
|
+
description=BROWSER_NAVIGATE_DESCRIPTION,
|
|
129
|
+
action_type=BrowserNavigateAction,
|
|
130
|
+
observation_type=BrowserObservation,
|
|
131
|
+
annotations=ToolAnnotations(
|
|
132
|
+
title="browser_navigate",
|
|
133
|
+
readOnlyHint=False,
|
|
134
|
+
destructiveHint=False,
|
|
135
|
+
idempotentHint=False,
|
|
136
|
+
openWorldHint=True,
|
|
137
|
+
),
|
|
138
|
+
executor=executor,
|
|
139
|
+
)
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ============================================
|
|
144
|
+
# `browser_click`
|
|
145
|
+
# ============================================
|
|
146
|
+
class BrowserClickAction(BrowserAction):
|
|
147
|
+
"""Schema for clicking elements."""
|
|
148
|
+
|
|
149
|
+
index: int = Field(
|
|
150
|
+
ge=0, description="The index of the element to click (from browser_get_state)"
|
|
151
|
+
)
|
|
152
|
+
new_tab: bool = Field(
|
|
153
|
+
default=False,
|
|
154
|
+
description="Whether to open any resulting navigation in a new tab. Default: False", # noqa: E501
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
BROWSER_CLICK_DESCRIPTION = """Click an element on the page by its index.
|
|
159
|
+
|
|
160
|
+
Use this tool to click on interactive elements like buttons, links, or form controls.
|
|
161
|
+
The index comes from the browser_get_state tool output.
|
|
162
|
+
|
|
163
|
+
Parameters:
|
|
164
|
+
- index: The index of the element to click (from browser_get_state)
|
|
165
|
+
- new_tab: Whether to open any resulting navigation in a new tab (optional)
|
|
166
|
+
|
|
167
|
+
Important: Only use indices that appear in your current browser_get_state output.
|
|
168
|
+
""" # noqa: E501
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class BrowserClickTool(ToolDefinition[BrowserClickAction, BrowserObservation]):
|
|
172
|
+
"""Tool for clicking browser elements."""
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
176
|
+
return [
|
|
177
|
+
cls(
|
|
178
|
+
description=BROWSER_CLICK_DESCRIPTION,
|
|
179
|
+
action_type=BrowserClickAction,
|
|
180
|
+
observation_type=BrowserObservation,
|
|
181
|
+
annotations=ToolAnnotations(
|
|
182
|
+
title="browser_click",
|
|
183
|
+
readOnlyHint=False,
|
|
184
|
+
destructiveHint=False,
|
|
185
|
+
idempotentHint=False,
|
|
186
|
+
openWorldHint=True,
|
|
187
|
+
),
|
|
188
|
+
executor=executor,
|
|
189
|
+
)
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ============================================
|
|
194
|
+
# `browser_type`
|
|
195
|
+
# ============================================
|
|
196
|
+
class BrowserTypeAction(BrowserAction):
|
|
197
|
+
"""Schema for typing text into elements."""
|
|
198
|
+
|
|
199
|
+
index: int = Field(
|
|
200
|
+
ge=0, description="The index of the input element (from browser_get_state)"
|
|
201
|
+
)
|
|
202
|
+
text: str = Field(description="The text to type")
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
BROWSER_TYPE_DESCRIPTION = """Type text into an input field.
|
|
206
|
+
|
|
207
|
+
Use this tool to enter text into form fields, search boxes, or other text input elements.
|
|
208
|
+
The index comes from the browser_get_state tool output.
|
|
209
|
+
|
|
210
|
+
Parameters:
|
|
211
|
+
- index: The index of the input element (from browser_get_state)
|
|
212
|
+
- text: The text to type
|
|
213
|
+
|
|
214
|
+
Important: Only use indices that appear in your current browser_get_state output.
|
|
215
|
+
""" # noqa: E501
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class BrowserTypeTool(ToolDefinition[BrowserTypeAction, BrowserObservation]):
|
|
219
|
+
"""Tool for typing text into browser elements."""
|
|
220
|
+
|
|
221
|
+
@classmethod
|
|
222
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
223
|
+
return [
|
|
224
|
+
cls(
|
|
225
|
+
description=BROWSER_TYPE_DESCRIPTION,
|
|
226
|
+
action_type=BrowserTypeAction,
|
|
227
|
+
observation_type=BrowserObservation,
|
|
228
|
+
annotations=ToolAnnotations(
|
|
229
|
+
title="browser_type",
|
|
230
|
+
readOnlyHint=False,
|
|
231
|
+
destructiveHint=False,
|
|
232
|
+
idempotentHint=False,
|
|
233
|
+
openWorldHint=True,
|
|
234
|
+
),
|
|
235
|
+
executor=executor,
|
|
236
|
+
)
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# ============================================
|
|
241
|
+
# `browser_get_state`
|
|
242
|
+
# ============================================
|
|
243
|
+
class BrowserGetStateAction(BrowserAction):
|
|
244
|
+
"""Schema for getting browser state."""
|
|
245
|
+
|
|
246
|
+
include_screenshot: bool = Field(
|
|
247
|
+
default=False,
|
|
248
|
+
description="Whether to include a screenshot of the current page. Default: False", # noqa: E501
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
BROWSER_GET_STATE_DESCRIPTION = """Get the current state of the page including all interactive elements.
|
|
253
|
+
|
|
254
|
+
This tool returns the current page content with numbered interactive elements that you can
|
|
255
|
+
click or type into. Use this frequently to understand what's available on the page.
|
|
256
|
+
|
|
257
|
+
Parameters:
|
|
258
|
+
- include_screenshot: Whether to include a screenshot (optional, default: False)
|
|
259
|
+
""" # noqa: E501
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class BrowserGetStateTool(ToolDefinition[BrowserGetStateAction, BrowserObservation]):
|
|
263
|
+
"""Tool for getting browser state."""
|
|
264
|
+
|
|
265
|
+
@classmethod
|
|
266
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
267
|
+
return [
|
|
268
|
+
cls(
|
|
269
|
+
description=BROWSER_GET_STATE_DESCRIPTION,
|
|
270
|
+
action_type=BrowserGetStateAction,
|
|
271
|
+
observation_type=BrowserObservation,
|
|
272
|
+
annotations=ToolAnnotations(
|
|
273
|
+
title="browser_get_state",
|
|
274
|
+
readOnlyHint=True,
|
|
275
|
+
destructiveHint=False,
|
|
276
|
+
idempotentHint=True,
|
|
277
|
+
openWorldHint=True,
|
|
278
|
+
),
|
|
279
|
+
executor=executor,
|
|
280
|
+
)
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
# ============================================
|
|
285
|
+
# `browser_get_content`
|
|
286
|
+
# ============================================
|
|
287
|
+
class BrowserGetContentAction(BrowserAction):
|
|
288
|
+
"""Schema for getting page content in markdown."""
|
|
289
|
+
|
|
290
|
+
extract_links: bool = Field(
|
|
291
|
+
default=False,
|
|
292
|
+
description="Whether to include links in the content (default: False)",
|
|
293
|
+
)
|
|
294
|
+
start_from_char: int = Field(
|
|
295
|
+
default=0,
|
|
296
|
+
ge=0,
|
|
297
|
+
description="Character index to start from in the page content (default: 0)",
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
BROWSER_GET_CONTENT_DESCRIPTION = """Extract the main content of the current page in clean markdown format. It has been filtered to remove noise and advertising content.
|
|
302
|
+
|
|
303
|
+
If the content was truncated and you need more information, use start_from_char parameter to continue from where truncation occurred.
|
|
304
|
+
""" # noqa: E501
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class BrowserGetContentTool(
|
|
308
|
+
ToolDefinition[BrowserGetContentAction, BrowserObservation]
|
|
309
|
+
):
|
|
310
|
+
"""Tool for getting page content in markdown."""
|
|
311
|
+
|
|
312
|
+
@classmethod
|
|
313
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
314
|
+
return [
|
|
315
|
+
cls(
|
|
316
|
+
description=BROWSER_GET_CONTENT_DESCRIPTION,
|
|
317
|
+
action_type=BrowserGetContentAction,
|
|
318
|
+
observation_type=BrowserObservation,
|
|
319
|
+
annotations=ToolAnnotations(
|
|
320
|
+
title="browser_get_content",
|
|
321
|
+
readOnlyHint=True,
|
|
322
|
+
destructiveHint=False,
|
|
323
|
+
idempotentHint=True,
|
|
324
|
+
openWorldHint=True,
|
|
325
|
+
),
|
|
326
|
+
executor=executor,
|
|
327
|
+
)
|
|
328
|
+
]
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# ============================================
|
|
332
|
+
# `browser_scroll`
|
|
333
|
+
# ============================================
|
|
334
|
+
class BrowserScrollAction(BrowserAction):
|
|
335
|
+
"""Schema for scrolling the page."""
|
|
336
|
+
|
|
337
|
+
direction: Literal["up", "down"] = Field(
|
|
338
|
+
default="down",
|
|
339
|
+
description="Direction to scroll. Options: 'up', 'down'. Default: 'down'",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
BROWSER_SCROLL_DESCRIPTION = """Scroll the page up or down.
|
|
344
|
+
|
|
345
|
+
Use this tool to scroll through page content when elements are not visible or when you need
|
|
346
|
+
to see more content.
|
|
347
|
+
|
|
348
|
+
Parameters:
|
|
349
|
+
- direction: Direction to scroll - "up" or "down" (optional, default: "down")
|
|
350
|
+
""" # noqa: E501
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class BrowserScrollTool(ToolDefinition[BrowserScrollAction, BrowserObservation]):
|
|
354
|
+
"""Tool for scrolling the browser page."""
|
|
355
|
+
|
|
356
|
+
@classmethod
|
|
357
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
358
|
+
return [
|
|
359
|
+
cls(
|
|
360
|
+
description=BROWSER_SCROLL_DESCRIPTION,
|
|
361
|
+
action_type=BrowserScrollAction,
|
|
362
|
+
observation_type=BrowserObservation,
|
|
363
|
+
annotations=ToolAnnotations(
|
|
364
|
+
title="browser_scroll",
|
|
365
|
+
readOnlyHint=False,
|
|
366
|
+
destructiveHint=False,
|
|
367
|
+
idempotentHint=False,
|
|
368
|
+
openWorldHint=True,
|
|
369
|
+
),
|
|
370
|
+
executor=executor,
|
|
371
|
+
)
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
# ============================================
|
|
376
|
+
# `browser_go_back`
|
|
377
|
+
# ============================================
|
|
378
|
+
class BrowserGoBackAction(BrowserAction):
|
|
379
|
+
"""Schema for going back in browser history."""
|
|
380
|
+
|
|
381
|
+
pass
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
BROWSER_GO_BACK_DESCRIPTION = """Go back to the previous page in browser history.
|
|
385
|
+
|
|
386
|
+
Use this tool to navigate back to the previously visited page, similar to clicking the
|
|
387
|
+
browser's back button.
|
|
388
|
+
""" # noqa: E501
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
class BrowserGoBackTool(ToolDefinition[BrowserGoBackAction, BrowserObservation]):
|
|
392
|
+
"""Tool for going back in browser history."""
|
|
393
|
+
|
|
394
|
+
@classmethod
|
|
395
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
396
|
+
return [
|
|
397
|
+
cls(
|
|
398
|
+
description=BROWSER_GO_BACK_DESCRIPTION,
|
|
399
|
+
action_type=BrowserGoBackAction,
|
|
400
|
+
observation_type=BrowserObservation,
|
|
401
|
+
annotations=ToolAnnotations(
|
|
402
|
+
title="browser_go_back",
|
|
403
|
+
readOnlyHint=False,
|
|
404
|
+
destructiveHint=False,
|
|
405
|
+
idempotentHint=False,
|
|
406
|
+
openWorldHint=True,
|
|
407
|
+
),
|
|
408
|
+
executor=executor,
|
|
409
|
+
)
|
|
410
|
+
]
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# ============================================
|
|
414
|
+
# `browser_list_tabs`
|
|
415
|
+
# ============================================
|
|
416
|
+
class BrowserListTabsAction(BrowserAction):
|
|
417
|
+
"""Schema for listing browser tabs."""
|
|
418
|
+
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
BROWSER_LIST_TABS_DESCRIPTION = """List all open browser tabs.
|
|
423
|
+
|
|
424
|
+
This tool shows all currently open tabs with their IDs, titles, and URLs. Use the tab IDs
|
|
425
|
+
with browser_switch_tab or browser_close_tab.
|
|
426
|
+
""" # noqa: E501
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
class BrowserListTabsTool(ToolDefinition[BrowserListTabsAction, BrowserObservation]):
|
|
430
|
+
"""Tool for listing browser tabs."""
|
|
431
|
+
|
|
432
|
+
@classmethod
|
|
433
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
434
|
+
return [
|
|
435
|
+
cls(
|
|
436
|
+
description=BROWSER_LIST_TABS_DESCRIPTION,
|
|
437
|
+
action_type=BrowserListTabsAction,
|
|
438
|
+
observation_type=BrowserObservation,
|
|
439
|
+
annotations=ToolAnnotations(
|
|
440
|
+
title="browser_list_tabs",
|
|
441
|
+
readOnlyHint=True,
|
|
442
|
+
destructiveHint=False,
|
|
443
|
+
idempotentHint=True,
|
|
444
|
+
openWorldHint=False,
|
|
445
|
+
),
|
|
446
|
+
executor=executor,
|
|
447
|
+
)
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ============================================
|
|
452
|
+
# `browser_switch_tab`
|
|
453
|
+
# ============================================
|
|
454
|
+
class BrowserSwitchTabAction(BrowserAction):
|
|
455
|
+
"""Schema for switching browser tabs."""
|
|
456
|
+
|
|
457
|
+
tab_id: str = Field(
|
|
458
|
+
description="4 Character Tab ID of the tab to switch"
|
|
459
|
+
+ " to (from browser_list_tabs)"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
BROWSER_SWITCH_TAB_DESCRIPTION = """Switch to a different browser tab.
|
|
464
|
+
|
|
465
|
+
Use this tool to switch between open tabs. Get the tab_id from browser_list_tabs.
|
|
466
|
+
|
|
467
|
+
Parameters:
|
|
468
|
+
- tab_id: 4 Character Tab ID of the tab to switch to
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
class BrowserSwitchTabTool(ToolDefinition[BrowserSwitchTabAction, BrowserObservation]):
|
|
473
|
+
"""Tool for switching browser tabs."""
|
|
474
|
+
|
|
475
|
+
@classmethod
|
|
476
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
477
|
+
return [
|
|
478
|
+
cls(
|
|
479
|
+
description=BROWSER_SWITCH_TAB_DESCRIPTION,
|
|
480
|
+
action_type=BrowserSwitchTabAction,
|
|
481
|
+
observation_type=BrowserObservation,
|
|
482
|
+
annotations=ToolAnnotations(
|
|
483
|
+
title="browser_switch_tab",
|
|
484
|
+
readOnlyHint=False,
|
|
485
|
+
destructiveHint=False,
|
|
486
|
+
idempotentHint=False,
|
|
487
|
+
openWorldHint=False,
|
|
488
|
+
),
|
|
489
|
+
executor=executor,
|
|
490
|
+
)
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# ============================================
|
|
495
|
+
# `browser_close_tab`
|
|
496
|
+
# ============================================
|
|
497
|
+
class BrowserCloseTabAction(BrowserAction):
|
|
498
|
+
"""Schema for closing browser tabs."""
|
|
499
|
+
|
|
500
|
+
tab_id: str = Field(
|
|
501
|
+
description="4 Character Tab ID of the tab to close (from browser_list_tabs)"
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
BROWSER_CLOSE_TAB_DESCRIPTION = """Close a specific browser tab.
|
|
506
|
+
|
|
507
|
+
Use this tool to close tabs you no longer need. Get the tab_id from browser_list_tabs.
|
|
508
|
+
|
|
509
|
+
Parameters:
|
|
510
|
+
- tab_id: 4 Character Tab ID of the tab to close
|
|
511
|
+
"""
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
class BrowserCloseTabTool(ToolDefinition[BrowserCloseTabAction, BrowserObservation]):
|
|
515
|
+
"""Tool for closing browser tabs."""
|
|
516
|
+
|
|
517
|
+
@classmethod
|
|
518
|
+
def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
|
|
519
|
+
return [
|
|
520
|
+
cls(
|
|
521
|
+
description=BROWSER_CLOSE_TAB_DESCRIPTION,
|
|
522
|
+
action_type=BrowserCloseTabAction,
|
|
523
|
+
observation_type=BrowserObservation,
|
|
524
|
+
annotations=ToolAnnotations(
|
|
525
|
+
title="browser_close_tab",
|
|
526
|
+
readOnlyHint=False,
|
|
527
|
+
destructiveHint=True,
|
|
528
|
+
idempotentHint=False,
|
|
529
|
+
openWorldHint=False,
|
|
530
|
+
),
|
|
531
|
+
executor=executor,
|
|
532
|
+
)
|
|
533
|
+
]
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
class BrowserToolSet(ToolDefinition[BrowserAction, BrowserObservation]):
|
|
537
|
+
"""A set of all browser tools.
|
|
538
|
+
|
|
539
|
+
This tool set includes all available browser-related tools
|
|
540
|
+
for interacting with web pages.
|
|
541
|
+
|
|
542
|
+
The toolset automatically checks for Chromium availability
|
|
543
|
+
when created and automatically installs it if missing.
|
|
544
|
+
"""
|
|
545
|
+
|
|
546
|
+
@classmethod
|
|
547
|
+
def create(
|
|
548
|
+
cls,
|
|
549
|
+
**executor_config,
|
|
550
|
+
) -> list[ToolDefinition[BrowserAction, BrowserObservation]]:
|
|
551
|
+
# Import executor only when actually needed to
|
|
552
|
+
# avoid hanging during module import
|
|
553
|
+
from openhands.tools.browser_use.impl import BrowserToolExecutor
|
|
554
|
+
|
|
555
|
+
executor = BrowserToolExecutor(**executor_config)
|
|
556
|
+
# Each tool.create() returns a Sequence[Self], so we flatten the results
|
|
557
|
+
tools: list[ToolDefinition[BrowserAction, BrowserObservation]] = []
|
|
558
|
+
for tool_class in [
|
|
559
|
+
BrowserNavigateTool,
|
|
560
|
+
BrowserClickTool,
|
|
561
|
+
BrowserGetStateTool,
|
|
562
|
+
BrowserGetContentTool,
|
|
563
|
+
BrowserTypeTool,
|
|
564
|
+
BrowserScrollTool,
|
|
565
|
+
BrowserGoBackTool,
|
|
566
|
+
BrowserListTabsTool,
|
|
567
|
+
BrowserSwitchTabTool,
|
|
568
|
+
BrowserCloseTabTool,
|
|
569
|
+
]:
|
|
570
|
+
tools.extend(tool_class.create(executor))
|
|
571
|
+
return tools
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
register_tool(BrowserToolSet.name, BrowserToolSet)
|