fleet-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fleet-python might be problematic. Click here for more details.
- examples/dsl_example.py +127 -0
- examples/example.py +11 -24
- examples/json_tasks_example.py +82 -0
- examples/nova_act_example.py +18 -169
- examples/openai_example.py +127 -223
- examples/openai_simple_example.py +61 -0
- examples/quickstart.py +5 -5
- fleet/__init__.py +17 -1
- fleet/base.py +1 -1
- fleet/client.py +77 -30
- fleet/env/__init__.py +2 -21
- fleet/env/client.py +9 -253
- fleet/instance/__init__.py +25 -0
- fleet/instance/client.py +295 -0
- fleet/{env → instance}/models.py +13 -0
- fleet/playwright.py +291 -0
- fleet/resources/base.py +5 -2
- fleet/resources/browser.py +15 -8
- fleet/resources/sqlite.py +3 -3
- fleet/verifiers/__init__.py +16 -0
- fleet/verifiers/code.py +132 -0
- fleet/verifiers/db.py +706 -0
- fleet/verifiers/sql_differ.py +187 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/METADATA +3 -1
- fleet_python-0.2.3.dist-info/RECORD +31 -0
- fleet_python-0.2.1.dist-info/RECORD +0 -21
- /fleet/{env → instance}/base.py +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/WHEEL +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {fleet_python-0.2.1.dist-info → fleet_python-0.2.3.dist-info}/top_level.txt +0 -0
fleet/instance/client.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""Fleet SDK Base Environment Classes."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
4
|
+
import asyncio
|
|
5
|
+
import httpx
|
|
6
|
+
import inspect
|
|
7
|
+
import time
|
|
8
|
+
import logging
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
from ..resources.sqlite import AsyncSQLiteResource
|
|
12
|
+
from ..resources.browser import AsyncBrowserResource
|
|
13
|
+
from ..resources.base import Resource
|
|
14
|
+
|
|
15
|
+
from ..verifiers import DatabaseSnapshot
|
|
16
|
+
|
|
17
|
+
from ..exceptions import FleetEnvironmentError, FleetAPIError
|
|
18
|
+
|
|
19
|
+
from .base import SyncWrapper, AsyncWrapper
|
|
20
|
+
from .models import (
|
|
21
|
+
ResetRequest,
|
|
22
|
+
ResetResponse,
|
|
23
|
+
Resource as ResourceModel,
|
|
24
|
+
ResourceType,
|
|
25
|
+
HealthResponse,
|
|
26
|
+
ExecuteFunctionRequest,
|
|
27
|
+
ExecuteFunctionResponse,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
RESOURCE_TYPES = {
|
|
35
|
+
ResourceType.db: AsyncSQLiteResource,
|
|
36
|
+
ResourceType.cdp: AsyncBrowserResource,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
ValidatorType = Callable[
|
|
40
|
+
[DatabaseSnapshot, DatabaseSnapshot, Optional[str]],
|
|
41
|
+
int,
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class InstanceClient:
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
url: str,
|
|
49
|
+
httpx_client: Optional[httpx.Client] = None,
|
|
50
|
+
):
|
|
51
|
+
self.base_url = url
|
|
52
|
+
self.client = SyncWrapper(
|
|
53
|
+
url=self.base_url, httpx_client=httpx_client or httpx.Client()
|
|
54
|
+
)
|
|
55
|
+
raise NotImplementedError("SyncManager is not implemented")
|
|
56
|
+
|
|
57
|
+
def reset(self) -> ResetResponse:
|
|
58
|
+
response = self.client.request("POST", "/reset")
|
|
59
|
+
return ResetResponse(**response.json())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class AsyncInstanceClient:
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
url: str,
|
|
66
|
+
httpx_client: Optional[httpx.AsyncClient] = None,
|
|
67
|
+
):
|
|
68
|
+
self.base_url = url
|
|
69
|
+
self.client = AsyncWrapper(
|
|
70
|
+
url=self.base_url,
|
|
71
|
+
httpx_client=httpx_client or httpx.AsyncClient(timeout=60.0),
|
|
72
|
+
)
|
|
73
|
+
self._resources: Optional[List[ResourceModel]] = None
|
|
74
|
+
self._resources_state: Dict[str, Dict[str, Resource]] = {
|
|
75
|
+
resource_type.value: {} for resource_type in ResourceType
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async def load(self) -> None:
|
|
79
|
+
await self._load_resources()
|
|
80
|
+
|
|
81
|
+
async def reset(
|
|
82
|
+
self, reset_request: Optional[ResetRequest] = None
|
|
83
|
+
) -> ResetResponse:
|
|
84
|
+
response = await self.client.request(
|
|
85
|
+
"POST", "/reset", json=reset_request.model_dump() if reset_request else None
|
|
86
|
+
)
|
|
87
|
+
return ResetResponse(**response.json())
|
|
88
|
+
|
|
89
|
+
def state(self, uri: str) -> Resource:
|
|
90
|
+
url = urlparse(uri)
|
|
91
|
+
return self._resources_state[url.scheme][url.netloc]
|
|
92
|
+
|
|
93
|
+
def db(self, name: str) -> AsyncSQLiteResource:
|
|
94
|
+
"""
|
|
95
|
+
Returns an AsyncSQLiteResource object for the given SQLite database name.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
name: The name of the SQLite database to return
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
An AsyncSQLiteResource object for the given SQLite database name
|
|
102
|
+
"""
|
|
103
|
+
return AsyncSQLiteResource(
|
|
104
|
+
self._resources_state[ResourceType.db.value][name], self.client
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def browser(self, name: str) -> AsyncBrowserResource:
|
|
108
|
+
return AsyncBrowserResource(
|
|
109
|
+
self._resources_state[ResourceType.cdp.value][name], self.client
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
async def resources(self) -> List[Resource]:
|
|
113
|
+
await self._load_resources()
|
|
114
|
+
return [
|
|
115
|
+
resource
|
|
116
|
+
for resources_by_name in self._resources_state.values()
|
|
117
|
+
for resource in resources_by_name.values()
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
async def verify(self, validator: ValidatorType) -> ExecuteFunctionResponse:
|
|
121
|
+
function_code = inspect.getsource(validator)
|
|
122
|
+
function_name = validator.__name__
|
|
123
|
+
return await self.verify_raw(function_code, function_name)
|
|
124
|
+
|
|
125
|
+
async def verify_raw(
|
|
126
|
+
self, function_code: str, function_name: str
|
|
127
|
+
) -> ExecuteFunctionResponse:
|
|
128
|
+
response = await self.client.request(
|
|
129
|
+
"POST",
|
|
130
|
+
"/execute_verifier_function",
|
|
131
|
+
json=ExecuteFunctionRequest(
|
|
132
|
+
function_code=function_code,
|
|
133
|
+
function_name=function_name,
|
|
134
|
+
).model_dump(),
|
|
135
|
+
)
|
|
136
|
+
return ExecuteFunctionResponse(**response.json())
|
|
137
|
+
|
|
138
|
+
async def _load_resources(self) -> None:
|
|
139
|
+
if self._resources is None:
|
|
140
|
+
response = await self.client.request("GET", "/resources")
|
|
141
|
+
if response.status_code != 200:
|
|
142
|
+
self._resources = []
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Handle both old and new response formats
|
|
146
|
+
response_data = response.json()
|
|
147
|
+
if isinstance(response_data, dict) and "resources" in response_data:
|
|
148
|
+
# Old format: {"resources": [...]}
|
|
149
|
+
resources_list = response_data["resources"]
|
|
150
|
+
else:
|
|
151
|
+
# New format: [...]
|
|
152
|
+
resources_list = response_data
|
|
153
|
+
|
|
154
|
+
self._resources = [ResourceModel(**resource) for resource in resources_list]
|
|
155
|
+
for resource in self._resources:
|
|
156
|
+
if resource.type not in self._resources_state:
|
|
157
|
+
self._resources_state[resource.type.value] = {}
|
|
158
|
+
self._resources_state[resource.type.value][resource.name] = (
|
|
159
|
+
RESOURCE_TYPES[resource.type](resource, self.client)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
async def step(self, action: Dict[str, Any]) -> Tuple[Dict[str, Any], float, bool]:
|
|
163
|
+
"""Execute one step in the environment."""
|
|
164
|
+
if not self._instance_id:
|
|
165
|
+
raise FleetEnvironmentError(
|
|
166
|
+
"Environment not initialized. Call reset() first."
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
# Increment step count
|
|
171
|
+
self._increment_step()
|
|
172
|
+
|
|
173
|
+
# Execute action through instance manager API
|
|
174
|
+
# This is a placeholder - actual implementation depends on the manager API spec
|
|
175
|
+
state, reward, done = await self._execute_action(action)
|
|
176
|
+
|
|
177
|
+
return state, reward, done
|
|
178
|
+
|
|
179
|
+
except Exception as e:
|
|
180
|
+
raise FleetEnvironmentError(f"Failed to execute step: {e}")
|
|
181
|
+
|
|
182
|
+
async def close(self) -> None:
|
|
183
|
+
"""Close the environment and clean up resources."""
|
|
184
|
+
try:
|
|
185
|
+
# Delete instance if it exists
|
|
186
|
+
if self._instance_id:
|
|
187
|
+
try:
|
|
188
|
+
await self._client.delete_instance(self._instance_id)
|
|
189
|
+
logger.info(f"Deleted instance: {self._instance_id}")
|
|
190
|
+
except FleetAPIError as e:
|
|
191
|
+
logger.warning(f"Failed to delete instance: {e}")
|
|
192
|
+
finally:
|
|
193
|
+
self._instance_id = None
|
|
194
|
+
self._instance_response = None
|
|
195
|
+
|
|
196
|
+
# Close manager client
|
|
197
|
+
if self._manager_client:
|
|
198
|
+
await self._manager_client.close()
|
|
199
|
+
self._manager_client = None
|
|
200
|
+
|
|
201
|
+
# Close API client
|
|
202
|
+
await self._client.close()
|
|
203
|
+
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.error(f"Error closing environment: {e}")
|
|
206
|
+
|
|
207
|
+
async def manager_health_check(self) -> Optional[HealthResponse]:
|
|
208
|
+
response = await self.client.request("GET", "/health")
|
|
209
|
+
return HealthResponse(**response.json())
|
|
210
|
+
|
|
211
|
+
async def _wait_for_instance_ready(self, timeout: float = 300.0) -> None:
|
|
212
|
+
"""Wait for instance to be ready.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
timeout: Maximum time to wait in seconds
|
|
216
|
+
"""
|
|
217
|
+
start_time = time.time()
|
|
218
|
+
|
|
219
|
+
while time.time() - start_time < timeout:
|
|
220
|
+
try:
|
|
221
|
+
instance = await self._client.get_instance(self._instance_id)
|
|
222
|
+
self._instance_response = instance
|
|
223
|
+
|
|
224
|
+
if instance.status == "running":
|
|
225
|
+
logger.info(f"Instance {self._instance_id} is ready")
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
elif instance.status == "error":
|
|
229
|
+
raise FleetEnvironmentError(
|
|
230
|
+
f"Instance {self._instance_id} failed to start"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Wait before checking again
|
|
234
|
+
await asyncio.sleep(5)
|
|
235
|
+
|
|
236
|
+
except FleetAPIError as e:
|
|
237
|
+
if time.time() - start_time >= timeout:
|
|
238
|
+
raise FleetEnvironmentError(
|
|
239
|
+
f"Timeout waiting for instance to be ready: {e}"
|
|
240
|
+
)
|
|
241
|
+
await asyncio.sleep(5)
|
|
242
|
+
|
|
243
|
+
raise FleetEnvironmentError(
|
|
244
|
+
f"Timeout waiting for instance {self._instance_id} to be ready"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
async def _execute_action(
|
|
248
|
+
self, action: Dict[str, Any]
|
|
249
|
+
) -> Tuple[Dict[str, Any], float, bool]:
|
|
250
|
+
"""Execute an action through the instance manager API.
|
|
251
|
+
|
|
252
|
+
This is a placeholder implementation that should be extended based on
|
|
253
|
+
the actual manager API specification.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
action: The action to execute as a dictionary
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Tuple of (state, reward, done)
|
|
260
|
+
"""
|
|
261
|
+
# Ensure manager client is available
|
|
262
|
+
await self._ensure_manager_client()
|
|
263
|
+
|
|
264
|
+
# TODO: In the future, this would use the manager API to execute actions
|
|
265
|
+
# For example: await self._manager_client.log_action(action)
|
|
266
|
+
# For now, return placeholder values
|
|
267
|
+
|
|
268
|
+
# Create a placeholder state
|
|
269
|
+
state = self._create_state_from_action(action)
|
|
270
|
+
|
|
271
|
+
# Create a placeholder reward
|
|
272
|
+
reward = 0.0
|
|
273
|
+
|
|
274
|
+
# Determine if episode is done (placeholder logic)
|
|
275
|
+
done = self._step_count >= 100 # Example: done after 100 steps
|
|
276
|
+
|
|
277
|
+
return state, reward, done
|
|
278
|
+
|
|
279
|
+
def _create_state_from_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
280
|
+
"""Create state based on executed action."""
|
|
281
|
+
return {
|
|
282
|
+
"instance_id": self._instance_id,
|
|
283
|
+
"step": self._step_count,
|
|
284
|
+
"last_action": action,
|
|
285
|
+
"timestamp": time.time(),
|
|
286
|
+
"status": "running",
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async def __aenter__(self):
|
|
290
|
+
"""Async context manager entry."""
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
294
|
+
"""Async context manager exit."""
|
|
295
|
+
await self.close()
|
fleet/{env → instance}/models.py
RENAMED
|
@@ -126,3 +126,16 @@ class Resource(BaseModel):
|
|
|
126
126
|
type: ResourceType
|
|
127
127
|
mode: ResourceMode
|
|
128
128
|
label: Optional[str] = Field(None, title="Label")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ExecuteFunctionRequest(BaseModel):
|
|
132
|
+
function_code: str
|
|
133
|
+
function_name: str
|
|
134
|
+
text_solution: Optional[str] = None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class ExecuteFunctionResponse(BaseModel):
|
|
138
|
+
success: bool
|
|
139
|
+
result: Optional[Any] = None
|
|
140
|
+
error: Optional[str] = None
|
|
141
|
+
message: str
|
fleet/playwright.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from typing import List, Dict, Any
|
|
3
|
+
from playwright.async_api import async_playwright, Browser, Page
|
|
4
|
+
from .client import AsyncEnvironment
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Key mapping for computer use actions
|
|
8
|
+
CUA_KEY_TO_PLAYWRIGHT_KEY = {
|
|
9
|
+
"/": "Divide",
|
|
10
|
+
"\\": "Backslash",
|
|
11
|
+
"alt": "Alt",
|
|
12
|
+
"arrowdown": "ArrowDown",
|
|
13
|
+
"arrowleft": "ArrowLeft",
|
|
14
|
+
"arrowright": "ArrowRight",
|
|
15
|
+
"arrowup": "ArrowUp",
|
|
16
|
+
"backspace": "Backspace",
|
|
17
|
+
"capslock": "CapsLock",
|
|
18
|
+
"cmd": "Meta",
|
|
19
|
+
"ctrl": "Control",
|
|
20
|
+
"delete": "Delete",
|
|
21
|
+
"end": "End",
|
|
22
|
+
"enter": "Enter",
|
|
23
|
+
"esc": "Escape",
|
|
24
|
+
"home": "Home",
|
|
25
|
+
"insert": "Insert",
|
|
26
|
+
"option": "Alt",
|
|
27
|
+
"pagedown": "PageDown",
|
|
28
|
+
"pageup": "PageUp",
|
|
29
|
+
"shift": "Shift",
|
|
30
|
+
"space": " ",
|
|
31
|
+
"super": "Meta",
|
|
32
|
+
"tab": "Tab",
|
|
33
|
+
"win": "Meta",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FleetPlaywrightWrapper:
|
|
38
|
+
"""
|
|
39
|
+
A wrapper that adds Playwright browser automation to Fleet environment instances.
|
|
40
|
+
|
|
41
|
+
This class handles:
|
|
42
|
+
- Browser connection via CDP
|
|
43
|
+
- Computer actions (click, scroll, type, etc.)
|
|
44
|
+
- Screenshot capture
|
|
45
|
+
- Integration with OpenAI computer use API
|
|
46
|
+
|
|
47
|
+
Usage:
|
|
48
|
+
instance = await fleet.env.make(env_key="hubspot", version="v1.2.7")
|
|
49
|
+
browser = FleetPlaywrightWrapper(instance)
|
|
50
|
+
await browser.start()
|
|
51
|
+
|
|
52
|
+
# Use browser methods
|
|
53
|
+
screenshot = await browser.screenshot()
|
|
54
|
+
tools = [browser.openai_cua_tool]
|
|
55
|
+
|
|
56
|
+
# Clean up when done
|
|
57
|
+
await browser.close()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def get_environment(self):
|
|
61
|
+
return "browser"
|
|
62
|
+
|
|
63
|
+
def get_dimensions(self):
|
|
64
|
+
return (1920, 1080)
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
env: AsyncEnvironment,
|
|
69
|
+
display_width: int = 1920,
|
|
70
|
+
display_height: int = 1080,
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
Initialize the Fleet Playwright wrapper.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
env: Fleet environment instance
|
|
77
|
+
display_width: Browser viewport width
|
|
78
|
+
display_height: Browser viewport height
|
|
79
|
+
"""
|
|
80
|
+
self.env = env
|
|
81
|
+
self.display_width = display_width
|
|
82
|
+
self.display_height = display_height
|
|
83
|
+
|
|
84
|
+
self._playwright = None
|
|
85
|
+
self._browser: Browser | None = None
|
|
86
|
+
self._page: Page | None = None
|
|
87
|
+
self._started = False
|
|
88
|
+
|
|
89
|
+
async def start(self):
|
|
90
|
+
"""Start the browser and establish connection."""
|
|
91
|
+
if self._started:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
# Start Playwright
|
|
95
|
+
self._playwright = await async_playwright().start()
|
|
96
|
+
|
|
97
|
+
# Start browser on the Fleet instance
|
|
98
|
+
print("Starting browser...")
|
|
99
|
+
await self.env.browser().start()
|
|
100
|
+
cdp = await self.env.browser().describe()
|
|
101
|
+
|
|
102
|
+
# Connect to browser
|
|
103
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(
|
|
104
|
+
cdp.cdp_browser_url
|
|
105
|
+
)
|
|
106
|
+
self._page = self._browser.contexts[0].pages[0]
|
|
107
|
+
await self._page.set_viewport_size(
|
|
108
|
+
{"width": self.display_width, "height": self.display_height}
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
self._started = True
|
|
112
|
+
print(f"Track agent: {cdp.cdp_devtools_url}")
|
|
113
|
+
|
|
114
|
+
async def close(self):
|
|
115
|
+
"""Close the browser connection."""
|
|
116
|
+
if self._playwright:
|
|
117
|
+
await self._playwright.stop()
|
|
118
|
+
self._playwright = None
|
|
119
|
+
self._browser = None
|
|
120
|
+
self._page = None
|
|
121
|
+
self._started = False
|
|
122
|
+
|
|
123
|
+
def _ensure_started(self):
|
|
124
|
+
"""Ensure browser is started before operations."""
|
|
125
|
+
if not self._started:
|
|
126
|
+
raise RuntimeError("Browser not started. Call await browser.start() first.")
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def openai_cua_tool(self) -> Dict[str, Any]:
|
|
130
|
+
"""
|
|
131
|
+
Tool definition for OpenAI computer use API.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Tool definition dict for use with OpenAI responses API
|
|
135
|
+
"""
|
|
136
|
+
return {
|
|
137
|
+
"type": "computer_use_preview",
|
|
138
|
+
"display_width": self.display_width,
|
|
139
|
+
"display_height": self.display_height,
|
|
140
|
+
"environment": "browser",
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async def screenshot(self) -> str:
|
|
144
|
+
"""
|
|
145
|
+
Take a screenshot and return base64 encoded string.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Base64 encoded PNG screenshot
|
|
149
|
+
"""
|
|
150
|
+
self._ensure_started()
|
|
151
|
+
|
|
152
|
+
png_bytes = await self._page.screenshot(full_page=False)
|
|
153
|
+
return base64.b64encode(png_bytes).decode("utf-8")
|
|
154
|
+
|
|
155
|
+
def get_current_url(self) -> str:
|
|
156
|
+
"""Get the current page URL."""
|
|
157
|
+
self._ensure_started()
|
|
158
|
+
return self._page.url
|
|
159
|
+
|
|
160
|
+
async def execute_computer_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
161
|
+
"""
|
|
162
|
+
Execute a computer action and return the result for OpenAI API.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
action: Computer action dict from OpenAI response
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Result dict for computer_call_output
|
|
169
|
+
"""
|
|
170
|
+
self._ensure_started()
|
|
171
|
+
|
|
172
|
+
action_type = action["type"]
|
|
173
|
+
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
174
|
+
|
|
175
|
+
print(f"Executing: {action_type}({action_args})")
|
|
176
|
+
|
|
177
|
+
# Execute the action
|
|
178
|
+
if hasattr(self, f"_{action_type}"):
|
|
179
|
+
method = getattr(self, f"_{action_type}")
|
|
180
|
+
await method(**action_args)
|
|
181
|
+
else:
|
|
182
|
+
raise ValueError(f"Unsupported action type: {action_type}")
|
|
183
|
+
|
|
184
|
+
# Take screenshot after action
|
|
185
|
+
screenshot_base64 = await self.screenshot()
|
|
186
|
+
|
|
187
|
+
return {
|
|
188
|
+
"type": "input_image",
|
|
189
|
+
"image_url": f"data:image/png;base64,{screenshot_base64}",
|
|
190
|
+
"current_url": self.get_current_url(),
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Computer action implementations
|
|
194
|
+
async def _click(self, x: int, y: int, button: str = "left") -> None:
|
|
195
|
+
"""Click at coordinates."""
|
|
196
|
+
self._ensure_started()
|
|
197
|
+
await self._page.mouse.click(x, y, button=button)
|
|
198
|
+
|
|
199
|
+
async def _double_click(self, x: int, y: int) -> None:
|
|
200
|
+
"""Double-click at coordinates."""
|
|
201
|
+
self._ensure_started()
|
|
202
|
+
await self._page.mouse.dblclick(x, y)
|
|
203
|
+
|
|
204
|
+
async def _scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
205
|
+
"""Scroll from coordinates."""
|
|
206
|
+
self._ensure_started()
|
|
207
|
+
await self._page.mouse.move(x, y)
|
|
208
|
+
await self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
|
209
|
+
|
|
210
|
+
async def _type(self, text: str) -> None:
|
|
211
|
+
"""Type text."""
|
|
212
|
+
self._ensure_started()
|
|
213
|
+
await self._page.keyboard.type(text)
|
|
214
|
+
|
|
215
|
+
async def _keypress(self, keys: List[str]) -> None:
|
|
216
|
+
"""Press key combination."""
|
|
217
|
+
self._ensure_started()
|
|
218
|
+
mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
|
|
219
|
+
for key in mapped_keys:
|
|
220
|
+
await self._page.keyboard.down(key)
|
|
221
|
+
for key in reversed(mapped_keys):
|
|
222
|
+
await self._page.keyboard.up(key)
|
|
223
|
+
|
|
224
|
+
async def _move(self, x: int, y: int) -> None:
|
|
225
|
+
"""Move mouse to coordinates."""
|
|
226
|
+
self._ensure_started()
|
|
227
|
+
await self._page.mouse.move(x, y)
|
|
228
|
+
|
|
229
|
+
async def _drag(self, path: List[Dict[str, int]]) -> None:
|
|
230
|
+
"""Drag mouse along path."""
|
|
231
|
+
self._ensure_started()
|
|
232
|
+
if not path:
|
|
233
|
+
return
|
|
234
|
+
await self._page.mouse.move(path[0]["x"], path[0]["y"])
|
|
235
|
+
await self._page.mouse.down()
|
|
236
|
+
for point in path[1:]:
|
|
237
|
+
await self._page.mouse.move(point["x"], point["y"])
|
|
238
|
+
await self._page.mouse.up()
|
|
239
|
+
|
|
240
|
+
async def _wait(self, ms: int = 1000) -> None:
|
|
241
|
+
"""Wait for specified milliseconds."""
|
|
242
|
+
import asyncio
|
|
243
|
+
|
|
244
|
+
await asyncio.sleep(ms / 1000)
|
|
245
|
+
|
|
246
|
+
# Browser-specific actions
|
|
247
|
+
async def _goto(self, url: str) -> None:
|
|
248
|
+
"""Navigate to URL."""
|
|
249
|
+
self._ensure_started()
|
|
250
|
+
try:
|
|
251
|
+
await self._page.goto(url)
|
|
252
|
+
except Exception as e:
|
|
253
|
+
print(f"Error navigating to {url}: {e}")
|
|
254
|
+
|
|
255
|
+
async def _back(self) -> None:
|
|
256
|
+
"""Go back in browser history."""
|
|
257
|
+
self._ensure_started()
|
|
258
|
+
await self._page.go_back()
|
|
259
|
+
|
|
260
|
+
async def _forward(self) -> None:
|
|
261
|
+
"""Go forward in browser history."""
|
|
262
|
+
self._ensure_started()
|
|
263
|
+
await self._page.go_forward()
|
|
264
|
+
|
|
265
|
+
async def _refresh(self) -> None:
|
|
266
|
+
"""Refresh the page."""
|
|
267
|
+
self._ensure_started()
|
|
268
|
+
await self._page.reload()
|
|
269
|
+
|
|
270
|
+
# ------------------------------------------------------------------
|
|
271
|
+
# Public aliases (no leading underscore) expected by the Agent &
|
|
272
|
+
# OpenAI computer-use API. They forward directly to the underscored
|
|
273
|
+
# implementations above so the external interface matches the older
|
|
274
|
+
# BasePlaywrightComputer class.
|
|
275
|
+
# ------------------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
# Mouse / keyboard actions
|
|
278
|
+
click = _click
|
|
279
|
+
double_click = _double_click
|
|
280
|
+
scroll = _scroll
|
|
281
|
+
type = _type # noqa: A003 – shadowing built-in for API compatibility
|
|
282
|
+
keypress = _keypress
|
|
283
|
+
move = _move
|
|
284
|
+
drag = _drag
|
|
285
|
+
wait = _wait
|
|
286
|
+
|
|
287
|
+
# Browser navigation actions
|
|
288
|
+
goto = _goto
|
|
289
|
+
back = _back
|
|
290
|
+
forward = _forward
|
|
291
|
+
refresh = _refresh
|
fleet/resources/base.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import ABC
|
|
2
|
-
from ..
|
|
2
|
+
from ..instance.models import Resource as ResourceModel, ResourceType, ResourceMode
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class Resource(ABC):
|
|
@@ -8,7 +8,7 @@ class Resource(ABC):
|
|
|
8
8
|
|
|
9
9
|
@property
|
|
10
10
|
def uri(self) -> str:
|
|
11
|
-
return f"{self.resource.type}://{self.resource.name}"
|
|
11
|
+
return f"{self.resource.type.value}://{self.resource.name}"
|
|
12
12
|
|
|
13
13
|
@property
|
|
14
14
|
def name(self) -> str:
|
|
@@ -21,3 +21,6 @@ class Resource(ABC):
|
|
|
21
21
|
@property
|
|
22
22
|
def mode(self) -> ResourceMode:
|
|
23
23
|
return self.resource.mode
|
|
24
|
+
|
|
25
|
+
def __repr__(self) -> str:
|
|
26
|
+
return f"Resource(uri={self.uri}, mode={self.mode.value})"
|
fleet/resources/browser.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from ..env.models import (
|
|
2
|
+
from ..instance.models import (
|
|
4
3
|
Resource as ResourceModel,
|
|
5
4
|
CDPDescribeResponse,
|
|
6
5
|
ChromeStartRequest,
|
|
@@ -11,7 +10,7 @@ from .base import Resource
|
|
|
11
10
|
from typing import TYPE_CHECKING
|
|
12
11
|
|
|
13
12
|
if TYPE_CHECKING:
|
|
14
|
-
from ..
|
|
13
|
+
from ..instance.base import AsyncWrapper
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class AsyncBrowserResource(Resource):
|
|
@@ -19,16 +18,24 @@ class AsyncBrowserResource(Resource):
|
|
|
19
18
|
super().__init__(resource)
|
|
20
19
|
self.client = client
|
|
21
20
|
|
|
22
|
-
async def start(
|
|
23
|
-
self, start_request: Optional[ChromeStartRequest] = None
|
|
24
|
-
) -> ChromeStartResponse:
|
|
21
|
+
async def start(self, width: int = 1920, height: int = 1080) -> CDPDescribeResponse:
|
|
25
22
|
response = await self.client.request(
|
|
26
23
|
"POST",
|
|
27
24
|
"/resources/cdp/start",
|
|
28
|
-
json=
|
|
25
|
+
json=ChromeStartRequest(resolution=f"{width},{height}").model_dump(),
|
|
29
26
|
)
|
|
30
|
-
|
|
27
|
+
ChromeStartResponse(**response.json())
|
|
28
|
+
return await self.describe()
|
|
31
29
|
|
|
32
30
|
async def describe(self) -> CDPDescribeResponse:
|
|
33
31
|
response = await self.client.request("GET", "/resources/cdp/describe")
|
|
32
|
+
if response.status_code != 200:
|
|
33
|
+
await self.start()
|
|
34
|
+
response = await self.client.request("GET", "/resources/cdp/describe")
|
|
34
35
|
return CDPDescribeResponse(**response.json())
|
|
36
|
+
|
|
37
|
+
async def cdp_url(self) -> str:
|
|
38
|
+
return (await self.describe()).cdp_browser_url
|
|
39
|
+
|
|
40
|
+
async def devtools_url(self) -> str:
|
|
41
|
+
return (await self.describe()).cdp_devtools_url
|
fleet/resources/sqlite.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from typing import Any, List, Optional
|
|
2
|
-
from ..
|
|
3
|
-
from ..
|
|
2
|
+
from ..instance.models import Resource as ResourceModel
|
|
3
|
+
from ..instance.models import DescribeResponse, QueryRequest, QueryResponse
|
|
4
4
|
from .base import Resource
|
|
5
5
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
|
-
from ..
|
|
9
|
+
from ..instance.base import AsyncWrapper
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class AsyncSQLiteResource(Resource):
|