openhands-tools 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. openhands_tools-1.2.0/PKG-INFO +13 -0
  2. openhands_tools-1.2.0/openhands/tools/__init__.py +9 -0
  3. openhands_tools-1.2.0/openhands/tools/browser_use/__init__.py +55 -0
  4. openhands_tools-1.2.0/openhands/tools/browser_use/definition.py +574 -0
  5. openhands_tools-1.2.0/openhands/tools/browser_use/impl.py +354 -0
  6. openhands_tools-1.2.0/openhands/tools/browser_use/server.py +100 -0
  7. openhands_tools-1.2.0/openhands/tools/delegate/__init__.py +18 -0
  8. openhands_tools-1.2.0/openhands/tools/delegate/definition.py +116 -0
  9. openhands_tools-1.2.0/openhands/tools/delegate/impl.py +271 -0
  10. openhands_tools-1.2.0/openhands/tools/delegate/visualizer.py +230 -0
  11. openhands_tools-1.2.0/openhands/tools/file_editor/__init__.py +15 -0
  12. openhands_tools-1.2.0/openhands/tools/file_editor/definition.py +258 -0
  13. openhands_tools-1.2.0/openhands/tools/file_editor/editor.py +739 -0
  14. openhands_tools-1.2.0/openhands/tools/file_editor/exceptions.py +54 -0
  15. openhands_tools-1.2.0/openhands/tools/file_editor/impl.py +107 -0
  16. openhands_tools-1.2.0/openhands/tools/file_editor/utils/__init__.py +0 -0
  17. openhands_tools-1.2.0/openhands/tools/file_editor/utils/config.py +2 -0
  18. openhands_tools-1.2.0/openhands/tools/file_editor/utils/constants.py +9 -0
  19. openhands_tools-1.2.0/openhands/tools/file_editor/utils/diff.py +124 -0
  20. openhands_tools-1.2.0/openhands/tools/file_editor/utils/encoding.py +137 -0
  21. openhands_tools-1.2.0/openhands/tools/file_editor/utils/file_cache.py +158 -0
  22. openhands_tools-1.2.0/openhands/tools/file_editor/utils/history.py +123 -0
  23. openhands_tools-1.2.0/openhands/tools/file_editor/utils/shell.py +71 -0
  24. openhands_tools-1.2.0/openhands/tools/glob/__init__.py +15 -0
  25. openhands_tools-1.2.0/openhands/tools/glob/definition.py +115 -0
  26. openhands_tools-1.2.0/openhands/tools/glob/impl.py +278 -0
  27. openhands_tools-1.2.0/openhands/tools/grep/__init__.py +16 -0
  28. openhands_tools-1.2.0/openhands/tools/grep/definition.py +117 -0
  29. openhands_tools-1.2.0/openhands/tools/grep/impl.py +250 -0
  30. openhands_tools-1.2.0/openhands/tools/planning_file_editor/__init__.py +6 -0
  31. openhands_tools-1.2.0/openhands/tools/planning_file_editor/definition.py +120 -0
  32. openhands_tools-1.2.0/openhands/tools/planning_file_editor/impl.py +66 -0
  33. openhands_tools-1.2.0/openhands/tools/preset/__init__.py +25 -0
  34. openhands_tools-1.2.0/openhands/tools/preset/default.py +88 -0
  35. openhands_tools-1.2.0/openhands/tools/preset/planning.py +171 -0
  36. openhands_tools-1.2.0/openhands/tools/py.typed +0 -0
  37. openhands_tools-1.2.0/openhands/tools/task_tracker/__init__.py +14 -0
  38. openhands_tools-1.2.0/openhands/tools/task_tracker/definition.py +432 -0
  39. openhands_tools-1.2.0/openhands/tools/terminal/__init__.py +28 -0
  40. openhands_tools-1.2.0/openhands/tools/terminal/constants.py +31 -0
  41. openhands_tools-1.2.0/openhands/tools/terminal/definition.py +287 -0
  42. openhands_tools-1.2.0/openhands/tools/terminal/impl.py +191 -0
  43. openhands_tools-1.2.0/openhands/tools/terminal/metadata.py +101 -0
  44. openhands_tools-1.2.0/openhands/tools/terminal/terminal/__init__.py +24 -0
  45. openhands_tools-1.2.0/openhands/tools/terminal/terminal/factory.py +122 -0
  46. openhands_tools-1.2.0/openhands/tools/terminal/terminal/interface.py +229 -0
  47. openhands_tools-1.2.0/openhands/tools/terminal/terminal/subprocess_terminal.py +452 -0
  48. openhands_tools-1.2.0/openhands/tools/terminal/terminal/terminal_session.py +502 -0
  49. openhands_tools-1.2.0/openhands/tools/terminal/terminal/tmux_terminal.py +177 -0
  50. openhands_tools-1.2.0/openhands/tools/terminal/utils/command.py +150 -0
  51. openhands_tools-1.2.0/openhands/tools/utils/__init__.py +45 -0
  52. openhands_tools-1.2.0/openhands/tools/utils/timeout.py +14 -0
  53. openhands_tools-1.2.0/openhands_tools.egg-info/PKG-INFO +13 -0
  54. openhands_tools-1.2.0/openhands_tools.egg-info/SOURCES.txt +108 -0
  55. openhands_tools-1.2.0/openhands_tools.egg-info/dependency_links.txt +1 -0
  56. openhands_tools-1.2.0/openhands_tools.egg-info/requires.txt +8 -0
  57. openhands_tools-1.2.0/openhands_tools.egg-info/top_level.txt +1 -0
  58. openhands_tools-1.2.0/pyproject.toml +30 -0
  59. openhands_tools-1.2.0/setup.cfg +4 -0
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: openhands-tools
3
+ Version: 1.2.0
4
+ Summary: OpenHands Tools - Runtime tools for AI agents
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: openhands-sdk
7
+ Requires-Dist: bashlex>=0.18
8
+ Requires-Dist: binaryornot>=0.4.4
9
+ Requires-Dist: cachetools
10
+ Requires-Dist: libtmux>=0.46.2
11
+ Requires-Dist: pydantic>=2.11.7
12
+ Requires-Dist: browser-use>=0.8.0
13
+ Requires-Dist: func-timeout>=4.3.5
@@ -0,0 +1,9 @@
1
+ """Runtime tools package."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+
6
+ try:
7
+ __version__ = version("openhands-tools")
8
+ except PackageNotFoundError:
9
+ __version__ = "0.0.0" # fallback for editable/unbuilt environments
@@ -0,0 +1,55 @@
1
+ """Browser tools using browser-use integration."""
2
+
3
+ from openhands.tools.browser_use.definition import (
4
+ BrowserClickAction,
5
+ BrowserClickTool,
6
+ BrowserCloseTabAction,
7
+ BrowserCloseTabTool,
8
+ BrowserGetContentAction,
9
+ BrowserGetContentTool,
10
+ BrowserGetStateAction,
11
+ BrowserGetStateTool,
12
+ BrowserGoBackAction,
13
+ BrowserGoBackTool,
14
+ BrowserListTabsAction,
15
+ BrowserListTabsTool,
16
+ BrowserNavigateAction,
17
+ BrowserNavigateTool,
18
+ BrowserObservation,
19
+ BrowserScrollAction,
20
+ BrowserScrollTool,
21
+ BrowserSwitchTabAction,
22
+ BrowserSwitchTabTool,
23
+ BrowserToolSet,
24
+ BrowserTypeAction,
25
+ BrowserTypeTool,
26
+ )
27
+
28
+
29
+ __all__ = [
30
+ # Tool classes
31
+ "BrowserNavigateTool",
32
+ "BrowserClickTool",
33
+ "BrowserTypeTool",
34
+ "BrowserGetStateTool",
35
+ "BrowserGetContentTool",
36
+ "BrowserScrollTool",
37
+ "BrowserGoBackTool",
38
+ "BrowserListTabsTool",
39
+ "BrowserSwitchTabTool",
40
+ "BrowserCloseTabTool",
41
+ # Actions
42
+ "BrowserNavigateAction",
43
+ "BrowserClickAction",
44
+ "BrowserTypeAction",
45
+ "BrowserGetStateAction",
46
+ "BrowserGetContentAction",
47
+ "BrowserScrollAction",
48
+ "BrowserGoBackAction",
49
+ "BrowserListTabsAction",
50
+ "BrowserSwitchTabAction",
51
+ "BrowserCloseTabAction",
52
+ # Observations
53
+ "BrowserObservation",
54
+ "BrowserToolSet",
55
+ ]
@@ -0,0 +1,574 @@
1
+ """Browser-use tool implementation for web automation."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import TYPE_CHECKING, Literal, Self
5
+
6
+ from pydantic import Field
7
+
8
+ from openhands.sdk.llm import ImageContent, TextContent
9
+ from openhands.sdk.tool import (
10
+ Action,
11
+ Observation,
12
+ ToolAnnotations,
13
+ ToolDefinition,
14
+ register_tool,
15
+ )
16
+ from openhands.sdk.utils import maybe_truncate
17
+
18
+
19
+ # Lazy import to avoid hanging during module import
20
+ if TYPE_CHECKING:
21
+ from openhands.tools.browser_use.impl import BrowserToolExecutor
22
+
23
+
24
+ # Maximum output size for browser observations
25
+ MAX_BROWSER_OUTPUT_SIZE = 50000
26
+
27
+ # Mapping of base64 prefixes to MIME types for image detection
28
+ BASE64_IMAGE_PREFIXES = {
29
+ "/9j/": "image/jpeg",
30
+ "iVBORw0KGgo": "image/png",
31
+ "R0lGODlh": "image/gif",
32
+ "UklGR": "image/webp",
33
+ }
34
+
35
+
36
+ def detect_image_mime_type(base64_data: str) -> str:
37
+ """Detect MIME type from base64-encoded image data.
38
+
39
+ Args:
40
+ base64_data: Base64-encoded image data
41
+
42
+ Returns:
43
+ Detected MIME type, defaults to "image/png" if not detected
44
+ """
45
+ for prefix, mime_type in BASE64_IMAGE_PREFIXES.items():
46
+ if base64_data.startswith(prefix):
47
+ return mime_type
48
+ return "image/png"
49
+
50
+
51
+ class BrowserObservation(Observation):
52
+ """Base observation for browser operations."""
53
+
54
+ screenshot_data: str | None = Field(
55
+ default=None, description="Base64 screenshot data if available"
56
+ )
57
+
58
+ @property
59
+ def to_llm_content(self) -> Sequence[TextContent | ImageContent]:
60
+ llm_content: list[TextContent | ImageContent] = []
61
+
62
+ # If is_error is true, prepend error message
63
+ if self.is_error:
64
+ llm_content.append(TextContent(text=self.ERROR_MESSAGE_HEADER))
65
+
66
+ # Get text content and truncate if needed
67
+ content_text = self.text
68
+ if content_text:
69
+ llm_content.append(
70
+ TextContent(text=maybe_truncate(content_text, MAX_BROWSER_OUTPUT_SIZE))
71
+ )
72
+
73
+ if self.screenshot_data:
74
+ mime_type = detect_image_mime_type(self.screenshot_data)
75
+ # Convert base64 to data URL format for ImageContent
76
+ data_url = f"data:{mime_type};base64,{self.screenshot_data}"
77
+ llm_content.append(ImageContent(image_urls=[data_url]))
78
+
79
+ return llm_content
80
+
81
+
82
+ # ============================================
83
+ # Base Browser Action
84
+ # ============================================
85
+ class BrowserAction(Action):
86
+ """Base class for all browser actions.
87
+
88
+ This base class serves as the parent for all browser-related actions,
89
+ enabling proper type hierarchy and eliminating the need for union types.
90
+ """
91
+
92
+ pass
93
+
94
+
95
+ # ============================================
96
+ # `go_to_url`
97
+ # ============================================
98
+ class BrowserNavigateAction(BrowserAction):
99
+ """Schema for browser navigation."""
100
+
101
+ url: str = Field(description="The URL to navigate to")
102
+ new_tab: bool = Field(
103
+ default=False, description="Whether to open in a new tab. Default: False"
104
+ )
105
+
106
+
107
+ BROWSER_NAVIGATE_DESCRIPTION = """Navigate to a URL in the browser.
108
+
109
+ This tool allows you to navigate to any web page. You can optionally open the URL in a new tab.
110
+
111
+ Parameters:
112
+ - url: The URL to navigate to (required)
113
+ - new_tab: Whether to open in a new tab (optional, default: False)
114
+
115
+ Examples:
116
+ - Navigate to Google: url="https://www.google.com"
117
+ - Open GitHub in new tab: url="https://github.com", new_tab=True
118
+ """ # noqa: E501
119
+
120
+
121
+ class BrowserNavigateTool(ToolDefinition[BrowserNavigateAction, BrowserObservation]):
122
+ """Tool for browser navigation."""
123
+
124
+ @classmethod
125
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
126
+ return [
127
+ cls(
128
+ description=BROWSER_NAVIGATE_DESCRIPTION,
129
+ action_type=BrowserNavigateAction,
130
+ observation_type=BrowserObservation,
131
+ annotations=ToolAnnotations(
132
+ title="browser_navigate",
133
+ readOnlyHint=False,
134
+ destructiveHint=False,
135
+ idempotentHint=False,
136
+ openWorldHint=True,
137
+ ),
138
+ executor=executor,
139
+ )
140
+ ]
141
+
142
+
143
+ # ============================================
144
+ # `browser_click`
145
+ # ============================================
146
+ class BrowserClickAction(BrowserAction):
147
+ """Schema for clicking elements."""
148
+
149
+ index: int = Field(
150
+ ge=0, description="The index of the element to click (from browser_get_state)"
151
+ )
152
+ new_tab: bool = Field(
153
+ default=False,
154
+ description="Whether to open any resulting navigation in a new tab. Default: False", # noqa: E501
155
+ )
156
+
157
+
158
+ BROWSER_CLICK_DESCRIPTION = """Click an element on the page by its index.
159
+
160
+ Use this tool to click on interactive elements like buttons, links, or form controls.
161
+ The index comes from the browser_get_state tool output.
162
+
163
+ Parameters:
164
+ - index: The index of the element to click (from browser_get_state)
165
+ - new_tab: Whether to open any resulting navigation in a new tab (optional)
166
+
167
+ Important: Only use indices that appear in your current browser_get_state output.
168
+ """ # noqa: E501
169
+
170
+
171
+ class BrowserClickTool(ToolDefinition[BrowserClickAction, BrowserObservation]):
172
+ """Tool for clicking browser elements."""
173
+
174
+ @classmethod
175
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
176
+ return [
177
+ cls(
178
+ description=BROWSER_CLICK_DESCRIPTION,
179
+ action_type=BrowserClickAction,
180
+ observation_type=BrowserObservation,
181
+ annotations=ToolAnnotations(
182
+ title="browser_click",
183
+ readOnlyHint=False,
184
+ destructiveHint=False,
185
+ idempotentHint=False,
186
+ openWorldHint=True,
187
+ ),
188
+ executor=executor,
189
+ )
190
+ ]
191
+
192
+
193
+ # ============================================
194
+ # `browser_type`
195
+ # ============================================
196
+ class BrowserTypeAction(BrowserAction):
197
+ """Schema for typing text into elements."""
198
+
199
+ index: int = Field(
200
+ ge=0, description="The index of the input element (from browser_get_state)"
201
+ )
202
+ text: str = Field(description="The text to type")
203
+
204
+
205
+ BROWSER_TYPE_DESCRIPTION = """Type text into an input field.
206
+
207
+ Use this tool to enter text into form fields, search boxes, or other text input elements.
208
+ The index comes from the browser_get_state tool output.
209
+
210
+ Parameters:
211
+ - index: The index of the input element (from browser_get_state)
212
+ - text: The text to type
213
+
214
+ Important: Only use indices that appear in your current browser_get_state output.
215
+ """ # noqa: E501
216
+
217
+
218
+ class BrowserTypeTool(ToolDefinition[BrowserTypeAction, BrowserObservation]):
219
+ """Tool for typing text into browser elements."""
220
+
221
+ @classmethod
222
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
223
+ return [
224
+ cls(
225
+ description=BROWSER_TYPE_DESCRIPTION,
226
+ action_type=BrowserTypeAction,
227
+ observation_type=BrowserObservation,
228
+ annotations=ToolAnnotations(
229
+ title="browser_type",
230
+ readOnlyHint=False,
231
+ destructiveHint=False,
232
+ idempotentHint=False,
233
+ openWorldHint=True,
234
+ ),
235
+ executor=executor,
236
+ )
237
+ ]
238
+
239
+
240
+ # ============================================
241
+ # `browser_get_state`
242
+ # ============================================
243
+ class BrowserGetStateAction(BrowserAction):
244
+ """Schema for getting browser state."""
245
+
246
+ include_screenshot: bool = Field(
247
+ default=False,
248
+ description="Whether to include a screenshot of the current page. Default: False", # noqa: E501
249
+ )
250
+
251
+
252
+ BROWSER_GET_STATE_DESCRIPTION = """Get the current state of the page including all interactive elements.
253
+
254
+ This tool returns the current page content with numbered interactive elements that you can
255
+ click or type into. Use this frequently to understand what's available on the page.
256
+
257
+ Parameters:
258
+ - include_screenshot: Whether to include a screenshot (optional, default: False)
259
+ """ # noqa: E501
260
+
261
+
262
+ class BrowserGetStateTool(ToolDefinition[BrowserGetStateAction, BrowserObservation]):
263
+ """Tool for getting browser state."""
264
+
265
+ @classmethod
266
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
267
+ return [
268
+ cls(
269
+ description=BROWSER_GET_STATE_DESCRIPTION,
270
+ action_type=BrowserGetStateAction,
271
+ observation_type=BrowserObservation,
272
+ annotations=ToolAnnotations(
273
+ title="browser_get_state",
274
+ readOnlyHint=True,
275
+ destructiveHint=False,
276
+ idempotentHint=True,
277
+ openWorldHint=True,
278
+ ),
279
+ executor=executor,
280
+ )
281
+ ]
282
+
283
+
284
+ # ============================================
285
+ # `browser_get_content`
286
+ # ============================================
287
+ class BrowserGetContentAction(BrowserAction):
288
+ """Schema for getting page content in markdown."""
289
+
290
+ extract_links: bool = Field(
291
+ default=False,
292
+ description="Whether to include links in the content (default: False)",
293
+ )
294
+ start_from_char: int = Field(
295
+ default=0,
296
+ ge=0,
297
+ description="Character index to start from in the page content (default: 0)",
298
+ )
299
+
300
+
301
+ BROWSER_GET_CONTENT_DESCRIPTION = """Extract the main content of the current page in clean markdown format. It has been filtered to remove noise and advertising content.
302
+
303
+ If the content was truncated and you need more information, use start_from_char parameter to continue from where truncation occurred.
304
+ """ # noqa: E501
305
+
306
+
307
+ class BrowserGetContentTool(
308
+ ToolDefinition[BrowserGetContentAction, BrowserObservation]
309
+ ):
310
+ """Tool for getting page content in markdown."""
311
+
312
+ @classmethod
313
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
314
+ return [
315
+ cls(
316
+ description=BROWSER_GET_CONTENT_DESCRIPTION,
317
+ action_type=BrowserGetContentAction,
318
+ observation_type=BrowserObservation,
319
+ annotations=ToolAnnotations(
320
+ title="browser_get_content",
321
+ readOnlyHint=True,
322
+ destructiveHint=False,
323
+ idempotentHint=True,
324
+ openWorldHint=True,
325
+ ),
326
+ executor=executor,
327
+ )
328
+ ]
329
+
330
+
331
+ # ============================================
332
+ # `browser_scroll`
333
+ # ============================================
334
+ class BrowserScrollAction(BrowserAction):
335
+ """Schema for scrolling the page."""
336
+
337
+ direction: Literal["up", "down"] = Field(
338
+ default="down",
339
+ description="Direction to scroll. Options: 'up', 'down'. Default: 'down'",
340
+ )
341
+
342
+
343
+ BROWSER_SCROLL_DESCRIPTION = """Scroll the page up or down.
344
+
345
+ Use this tool to scroll through page content when elements are not visible or when you need
346
+ to see more content.
347
+
348
+ Parameters:
349
+ - direction: Direction to scroll - "up" or "down" (optional, default: "down")
350
+ """ # noqa: E501
351
+
352
+
353
+ class BrowserScrollTool(ToolDefinition[BrowserScrollAction, BrowserObservation]):
354
+ """Tool for scrolling the browser page."""
355
+
356
+ @classmethod
357
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
358
+ return [
359
+ cls(
360
+ description=BROWSER_SCROLL_DESCRIPTION,
361
+ action_type=BrowserScrollAction,
362
+ observation_type=BrowserObservation,
363
+ annotations=ToolAnnotations(
364
+ title="browser_scroll",
365
+ readOnlyHint=False,
366
+ destructiveHint=False,
367
+ idempotentHint=False,
368
+ openWorldHint=True,
369
+ ),
370
+ executor=executor,
371
+ )
372
+ ]
373
+
374
+
375
+ # ============================================
376
+ # `browser_go_back`
377
+ # ============================================
378
+ class BrowserGoBackAction(BrowserAction):
379
+ """Schema for going back in browser history."""
380
+
381
+ pass
382
+
383
+
384
+ BROWSER_GO_BACK_DESCRIPTION = """Go back to the previous page in browser history.
385
+
386
+ Use this tool to navigate back to the previously visited page, similar to clicking the
387
+ browser's back button.
388
+ """ # noqa: E501
389
+
390
+
391
+ class BrowserGoBackTool(ToolDefinition[BrowserGoBackAction, BrowserObservation]):
392
+ """Tool for going back in browser history."""
393
+
394
+ @classmethod
395
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
396
+ return [
397
+ cls(
398
+ description=BROWSER_GO_BACK_DESCRIPTION,
399
+ action_type=BrowserGoBackAction,
400
+ observation_type=BrowserObservation,
401
+ annotations=ToolAnnotations(
402
+ title="browser_go_back",
403
+ readOnlyHint=False,
404
+ destructiveHint=False,
405
+ idempotentHint=False,
406
+ openWorldHint=True,
407
+ ),
408
+ executor=executor,
409
+ )
410
+ ]
411
+
412
+
413
+ # ============================================
414
+ # `browser_list_tabs`
415
+ # ============================================
416
+ class BrowserListTabsAction(BrowserAction):
417
+ """Schema for listing browser tabs."""
418
+
419
+ pass
420
+
421
+
422
+ BROWSER_LIST_TABS_DESCRIPTION = """List all open browser tabs.
423
+
424
+ This tool shows all currently open tabs with their IDs, titles, and URLs. Use the tab IDs
425
+ with browser_switch_tab or browser_close_tab.
426
+ """ # noqa: E501
427
+
428
+
429
+ class BrowserListTabsTool(ToolDefinition[BrowserListTabsAction, BrowserObservation]):
430
+ """Tool for listing browser tabs."""
431
+
432
+ @classmethod
433
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
434
+ return [
435
+ cls(
436
+ description=BROWSER_LIST_TABS_DESCRIPTION,
437
+ action_type=BrowserListTabsAction,
438
+ observation_type=BrowserObservation,
439
+ annotations=ToolAnnotations(
440
+ title="browser_list_tabs",
441
+ readOnlyHint=True,
442
+ destructiveHint=False,
443
+ idempotentHint=True,
444
+ openWorldHint=False,
445
+ ),
446
+ executor=executor,
447
+ )
448
+ ]
449
+
450
+
451
+ # ============================================
452
+ # `browser_switch_tab`
453
+ # ============================================
454
+ class BrowserSwitchTabAction(BrowserAction):
455
+ """Schema for switching browser tabs."""
456
+
457
+ tab_id: str = Field(
458
+ description="4 Character Tab ID of the tab to switch"
459
+ + " to (from browser_list_tabs)"
460
+ )
461
+
462
+
463
+ BROWSER_SWITCH_TAB_DESCRIPTION = """Switch to a different browser tab.
464
+
465
+ Use this tool to switch between open tabs. Get the tab_id from browser_list_tabs.
466
+
467
+ Parameters:
468
+ - tab_id: 4 Character Tab ID of the tab to switch to
469
+ """
470
+
471
+
472
+ class BrowserSwitchTabTool(ToolDefinition[BrowserSwitchTabAction, BrowserObservation]):
473
+ """Tool for switching browser tabs."""
474
+
475
+ @classmethod
476
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
477
+ return [
478
+ cls(
479
+ description=BROWSER_SWITCH_TAB_DESCRIPTION,
480
+ action_type=BrowserSwitchTabAction,
481
+ observation_type=BrowserObservation,
482
+ annotations=ToolAnnotations(
483
+ title="browser_switch_tab",
484
+ readOnlyHint=False,
485
+ destructiveHint=False,
486
+ idempotentHint=False,
487
+ openWorldHint=False,
488
+ ),
489
+ executor=executor,
490
+ )
491
+ ]
492
+
493
+
494
+ # ============================================
495
+ # `browser_close_tab`
496
+ # ============================================
497
+ class BrowserCloseTabAction(BrowserAction):
498
+ """Schema for closing browser tabs."""
499
+
500
+ tab_id: str = Field(
501
+ description="4 Character Tab ID of the tab to close (from browser_list_tabs)"
502
+ )
503
+
504
+
505
+ BROWSER_CLOSE_TAB_DESCRIPTION = """Close a specific browser tab.
506
+
507
+ Use this tool to close tabs you no longer need. Get the tab_id from browser_list_tabs.
508
+
509
+ Parameters:
510
+ - tab_id: 4 Character Tab ID of the tab to close
511
+ """
512
+
513
+
514
+ class BrowserCloseTabTool(ToolDefinition[BrowserCloseTabAction, BrowserObservation]):
515
+ """Tool for closing browser tabs."""
516
+
517
+ @classmethod
518
+ def create(cls, executor: "BrowserToolExecutor") -> Sequence[Self]:
519
+ return [
520
+ cls(
521
+ description=BROWSER_CLOSE_TAB_DESCRIPTION,
522
+ action_type=BrowserCloseTabAction,
523
+ observation_type=BrowserObservation,
524
+ annotations=ToolAnnotations(
525
+ title="browser_close_tab",
526
+ readOnlyHint=False,
527
+ destructiveHint=True,
528
+ idempotentHint=False,
529
+ openWorldHint=False,
530
+ ),
531
+ executor=executor,
532
+ )
533
+ ]
534
+
535
+
536
+ class BrowserToolSet(ToolDefinition[BrowserAction, BrowserObservation]):
537
+ """A set of all browser tools.
538
+
539
+ This tool set includes all available browser-related tools
540
+ for interacting with web pages.
541
+
542
+ The toolset automatically checks for Chromium availability
543
+ when created and automatically installs it if missing.
544
+ """
545
+
546
+ @classmethod
547
+ def create(
548
+ cls,
549
+ **executor_config,
550
+ ) -> list[ToolDefinition[BrowserAction, BrowserObservation]]:
551
+ # Import executor only when actually needed to
552
+ # avoid hanging during module import
553
+ from openhands.tools.browser_use.impl import BrowserToolExecutor
554
+
555
+ executor = BrowserToolExecutor(**executor_config)
556
+ # Each tool.create() returns a Sequence[Self], so we flatten the results
557
+ tools: list[ToolDefinition[BrowserAction, BrowserObservation]] = []
558
+ for tool_class in [
559
+ BrowserNavigateTool,
560
+ BrowserClickTool,
561
+ BrowserGetStateTool,
562
+ BrowserGetContentTool,
563
+ BrowserTypeTool,
564
+ BrowserScrollTool,
565
+ BrowserGoBackTool,
566
+ BrowserListTabsTool,
567
+ BrowserSwitchTabTool,
568
+ BrowserCloseTabTool,
569
+ ]:
570
+ tools.extend(tool_class.create(executor))
571
+ return tools
572
+
573
+
574
+ register_tool(BrowserToolSet.name, BrowserToolSet)