fleet-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fleet-python might be problematic. Click here for more details.

@@ -0,0 +1,295 @@
1
+ """Fleet SDK Base Environment Classes."""
2
+
3
+ from typing import Any, Callable, Dict, List, Optional, Tuple
4
+ import asyncio
5
+ import httpx
6
+ import inspect
7
+ import time
8
+ import logging
9
+ from urllib.parse import urlparse
10
+
11
+ from ..resources.sqlite import AsyncSQLiteResource
12
+ from ..resources.browser import AsyncBrowserResource
13
+ from ..resources.base import Resource
14
+
15
+ from ..verifiers import DatabaseSnapshot
16
+
17
+ from ..exceptions import FleetEnvironmentError, FleetAPIError
18
+
19
+ from .base import SyncWrapper, AsyncWrapper
20
+ from .models import (
21
+ ResetRequest,
22
+ ResetResponse,
23
+ Resource as ResourceModel,
24
+ ResourceType,
25
+ HealthResponse,
26
+ ExecuteFunctionRequest,
27
+ ExecuteFunctionResponse,
28
+ )
29
+
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ RESOURCE_TYPES = {
35
+ ResourceType.db: AsyncSQLiteResource,
36
+ ResourceType.cdp: AsyncBrowserResource,
37
+ }
38
+
39
+ ValidatorType = Callable[
40
+ [DatabaseSnapshot, DatabaseSnapshot, Optional[str]],
41
+ int,
42
+ ]
43
+
44
+
45
+ class InstanceClient:
46
+ def __init__(
47
+ self,
48
+ url: str,
49
+ httpx_client: Optional[httpx.Client] = None,
50
+ ):
51
+ self.base_url = url
52
+ self.client = SyncWrapper(
53
+ url=self.base_url, httpx_client=httpx_client or httpx.Client()
54
+ )
55
+ raise NotImplementedError("SyncManager is not implemented")
56
+
57
+ def reset(self) -> ResetResponse:
58
+ response = self.client.request("POST", "/reset")
59
+ return ResetResponse(**response.json())
60
+
61
+
62
+ class AsyncInstanceClient:
63
+ def __init__(
64
+ self,
65
+ url: str,
66
+ httpx_client: Optional[httpx.AsyncClient] = None,
67
+ ):
68
+ self.base_url = url
69
+ self.client = AsyncWrapper(
70
+ url=self.base_url,
71
+ httpx_client=httpx_client or httpx.AsyncClient(timeout=60.0),
72
+ )
73
+ self._resources: Optional[List[ResourceModel]] = None
74
+ self._resources_state: Dict[str, Dict[str, Resource]] = {
75
+ resource_type.value: {} for resource_type in ResourceType
76
+ }
77
+
78
+ async def load(self) -> None:
79
+ await self._load_resources()
80
+
81
+ async def reset(
82
+ self, reset_request: Optional[ResetRequest] = None
83
+ ) -> ResetResponse:
84
+ response = await self.client.request(
85
+ "POST", "/reset", json=reset_request.model_dump() if reset_request else None
86
+ )
87
+ return ResetResponse(**response.json())
88
+
89
+ def state(self, uri: str) -> Resource:
90
+ url = urlparse(uri)
91
+ return self._resources_state[url.scheme][url.netloc]
92
+
93
+ def db(self, name: str) -> AsyncSQLiteResource:
94
+ """
95
+ Returns an AsyncSQLiteResource object for the given SQLite database name.
96
+
97
+ Args:
98
+ name: The name of the SQLite database to return
99
+
100
+ Returns:
101
+ An AsyncSQLiteResource object for the given SQLite database name
102
+ """
103
+ return AsyncSQLiteResource(
104
+ self._resources_state[ResourceType.db.value][name], self.client
105
+ )
106
+
107
+ def browser(self, name: str) -> AsyncBrowserResource:
108
+ return AsyncBrowserResource(
109
+ self._resources_state[ResourceType.cdp.value][name], self.client
110
+ )
111
+
112
+ async def resources(self) -> List[Resource]:
113
+ await self._load_resources()
114
+ return [
115
+ resource
116
+ for resources_by_name in self._resources_state.values()
117
+ for resource in resources_by_name.values()
118
+ ]
119
+
120
+ async def verify(self, validator: ValidatorType) -> ExecuteFunctionResponse:
121
+ function_code = inspect.getsource(validator)
122
+ function_name = validator.__name__
123
+ return await self.verify_raw(function_code, function_name)
124
+
125
+ async def verify_raw(
126
+ self, function_code: str, function_name: str
127
+ ) -> ExecuteFunctionResponse:
128
+ response = await self.client.request(
129
+ "POST",
130
+ "/execute_verifier_function",
131
+ json=ExecuteFunctionRequest(
132
+ function_code=function_code,
133
+ function_name=function_name,
134
+ ).model_dump(),
135
+ )
136
+ return ExecuteFunctionResponse(**response.json())
137
+
138
+ async def _load_resources(self) -> None:
139
+ if self._resources is None:
140
+ response = await self.client.request("GET", "/resources")
141
+ if response.status_code != 200:
142
+ self._resources = []
143
+ return
144
+
145
+ # Handle both old and new response formats
146
+ response_data = response.json()
147
+ if isinstance(response_data, dict) and "resources" in response_data:
148
+ # Old format: {"resources": [...]}
149
+ resources_list = response_data["resources"]
150
+ else:
151
+ # New format: [...]
152
+ resources_list = response_data
153
+
154
+ self._resources = [ResourceModel(**resource) for resource in resources_list]
155
+ for resource in self._resources:
156
+ if resource.type not in self._resources_state:
157
+ self._resources_state[resource.type.value] = {}
158
+ self._resources_state[resource.type.value][resource.name] = (
159
+ RESOURCE_TYPES[resource.type](resource, self.client)
160
+ )
161
+
162
+ async def step(self, action: Dict[str, Any]) -> Tuple[Dict[str, Any], float, bool]:
163
+ """Execute one step in the environment."""
164
+ if not self._instance_id:
165
+ raise FleetEnvironmentError(
166
+ "Environment not initialized. Call reset() first."
167
+ )
168
+
169
+ try:
170
+ # Increment step count
171
+ self._increment_step()
172
+
173
+ # Execute action through instance manager API
174
+ # This is a placeholder - actual implementation depends on the manager API spec
175
+ state, reward, done = await self._execute_action(action)
176
+
177
+ return state, reward, done
178
+
179
+ except Exception as e:
180
+ raise FleetEnvironmentError(f"Failed to execute step: {e}")
181
+
182
+ async def close(self) -> None:
183
+ """Close the environment and clean up resources."""
184
+ try:
185
+ # Delete instance if it exists
186
+ if self._instance_id:
187
+ try:
188
+ await self._client.delete_instance(self._instance_id)
189
+ logger.info(f"Deleted instance: {self._instance_id}")
190
+ except FleetAPIError as e:
191
+ logger.warning(f"Failed to delete instance: {e}")
192
+ finally:
193
+ self._instance_id = None
194
+ self._instance_response = None
195
+
196
+ # Close manager client
197
+ if self._manager_client:
198
+ await self._manager_client.close()
199
+ self._manager_client = None
200
+
201
+ # Close API client
202
+ await self._client.close()
203
+
204
+ except Exception as e:
205
+ logger.error(f"Error closing environment: {e}")
206
+
207
+ async def manager_health_check(self) -> Optional[HealthResponse]:
208
+ response = await self.client.request("GET", "/health")
209
+ return HealthResponse(**response.json())
210
+
211
+ async def _wait_for_instance_ready(self, timeout: float = 300.0) -> None:
212
+ """Wait for instance to be ready.
213
+
214
+ Args:
215
+ timeout: Maximum time to wait in seconds
216
+ """
217
+ start_time = time.time()
218
+
219
+ while time.time() - start_time < timeout:
220
+ try:
221
+ instance = await self._client.get_instance(self._instance_id)
222
+ self._instance_response = instance
223
+
224
+ if instance.status == "running":
225
+ logger.info(f"Instance {self._instance_id} is ready")
226
+ return
227
+
228
+ elif instance.status == "error":
229
+ raise FleetEnvironmentError(
230
+ f"Instance {self._instance_id} failed to start"
231
+ )
232
+
233
+ # Wait before checking again
234
+ await asyncio.sleep(5)
235
+
236
+ except FleetAPIError as e:
237
+ if time.time() - start_time >= timeout:
238
+ raise FleetEnvironmentError(
239
+ f"Timeout waiting for instance to be ready: {e}"
240
+ )
241
+ await asyncio.sleep(5)
242
+
243
+ raise FleetEnvironmentError(
244
+ f"Timeout waiting for instance {self._instance_id} to be ready"
245
+ )
246
+
247
+ async def _execute_action(
248
+ self, action: Dict[str, Any]
249
+ ) -> Tuple[Dict[str, Any], float, bool]:
250
+ """Execute an action through the instance manager API.
251
+
252
+ This is a placeholder implementation that should be extended based on
253
+ the actual manager API specification.
254
+
255
+ Args:
256
+ action: The action to execute as a dictionary
257
+
258
+ Returns:
259
+ Tuple of (state, reward, done)
260
+ """
261
+ # Ensure manager client is available
262
+ await self._ensure_manager_client()
263
+
264
+ # TODO: In the future, this would use the manager API to execute actions
265
+ # For example: await self._manager_client.log_action(action)
266
+ # For now, return placeholder values
267
+
268
+ # Create a placeholder state
269
+ state = self._create_state_from_action(action)
270
+
271
+ # Create a placeholder reward
272
+ reward = 0.0
273
+
274
+ # Determine if episode is done (placeholder logic)
275
+ done = self._step_count >= 100 # Example: done after 100 steps
276
+
277
+ return state, reward, done
278
+
279
+ def _create_state_from_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
280
+ """Create state based on executed action."""
281
+ return {
282
+ "instance_id": self._instance_id,
283
+ "step": self._step_count,
284
+ "last_action": action,
285
+ "timestamp": time.time(),
286
+ "status": "running",
287
+ }
288
+
289
+ async def __aenter__(self):
290
+ """Async context manager entry."""
291
+ return self
292
+
293
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
294
+ """Async context manager exit."""
295
+ await self.close()
@@ -126,3 +126,16 @@ class Resource(BaseModel):
126
126
  type: ResourceType
127
127
  mode: ResourceMode
128
128
  label: Optional[str] = Field(None, title="Label")
129
+
130
+
131
+ class ExecuteFunctionRequest(BaseModel):
132
+ function_code: str
133
+ function_name: str
134
+ text_solution: Optional[str] = None
135
+
136
+
137
+ class ExecuteFunctionResponse(BaseModel):
138
+ success: bool
139
+ result: Optional[Any] = None
140
+ error: Optional[str] = None
141
+ message: str
fleet/playwright.py ADDED
@@ -0,0 +1,291 @@
1
+ import base64
2
+ from typing import List, Dict, Any
3
+ from playwright.async_api import async_playwright, Browser, Page
4
+ from .client import AsyncEnvironment
5
+
6
+
7
+ # Key mapping for computer use actions
8
+ CUA_KEY_TO_PLAYWRIGHT_KEY = {
9
+ "/": "Divide",
10
+ "\\": "Backslash",
11
+ "alt": "Alt",
12
+ "arrowdown": "ArrowDown",
13
+ "arrowleft": "ArrowLeft",
14
+ "arrowright": "ArrowRight",
15
+ "arrowup": "ArrowUp",
16
+ "backspace": "Backspace",
17
+ "capslock": "CapsLock",
18
+ "cmd": "Meta",
19
+ "ctrl": "Control",
20
+ "delete": "Delete",
21
+ "end": "End",
22
+ "enter": "Enter",
23
+ "esc": "Escape",
24
+ "home": "Home",
25
+ "insert": "Insert",
26
+ "option": "Alt",
27
+ "pagedown": "PageDown",
28
+ "pageup": "PageUp",
29
+ "shift": "Shift",
30
+ "space": " ",
31
+ "super": "Meta",
32
+ "tab": "Tab",
33
+ "win": "Meta",
34
+ }
35
+
36
+
37
+ class FleetPlaywrightWrapper:
38
+ """
39
+ A wrapper that adds Playwright browser automation to Fleet environment instances.
40
+
41
+ This class handles:
42
+ - Browser connection via CDP
43
+ - Computer actions (click, scroll, type, etc.)
44
+ - Screenshot capture
45
+ - Integration with OpenAI computer use API
46
+
47
+ Usage:
48
+ instance = await fleet.env.make(env_key="hubspot", version="v1.2.7")
49
+ browser = FleetPlaywrightWrapper(instance)
50
+ await browser.start()
51
+
52
+ # Use browser methods
53
+ screenshot = await browser.screenshot()
54
+ tools = [browser.openai_cua_tool]
55
+
56
+ # Clean up when done
57
+ await browser.close()
58
+ """
59
+
60
+ def get_environment(self):
61
+ return "browser"
62
+
63
+ def get_dimensions(self):
64
+ return (1920, 1080)
65
+
66
+ def __init__(
67
+ self,
68
+ env: AsyncEnvironment,
69
+ display_width: int = 1920,
70
+ display_height: int = 1080,
71
+ ):
72
+ """
73
+ Initialize the Fleet Playwright wrapper.
74
+
75
+ Args:
76
+ env: Fleet environment instance
77
+ display_width: Browser viewport width
78
+ display_height: Browser viewport height
79
+ """
80
+ self.env = env
81
+ self.display_width = display_width
82
+ self.display_height = display_height
83
+
84
+ self._playwright = None
85
+ self._browser: Browser | None = None
86
+ self._page: Page | None = None
87
+ self._started = False
88
+
89
+ async def start(self):
90
+ """Start the browser and establish connection."""
91
+ if self._started:
92
+ return
93
+
94
+ # Start Playwright
95
+ self._playwright = await async_playwright().start()
96
+
97
+ # Start browser on the Fleet instance
98
+ print("Starting browser...")
99
+ await self.env.browser().start()
100
+ cdp = await self.env.browser().describe()
101
+
102
+ # Connect to browser
103
+ self._browser = await self._playwright.chromium.connect_over_cdp(
104
+ cdp.cdp_browser_url
105
+ )
106
+ self._page = self._browser.contexts[0].pages[0]
107
+ await self._page.set_viewport_size(
108
+ {"width": self.display_width, "height": self.display_height}
109
+ )
110
+
111
+ self._started = True
112
+ print(f"Track agent: {cdp.cdp_devtools_url}")
113
+
114
+ async def close(self):
115
+ """Close the browser connection."""
116
+ if self._playwright:
117
+ await self._playwright.stop()
118
+ self._playwright = None
119
+ self._browser = None
120
+ self._page = None
121
+ self._started = False
122
+
123
+ def _ensure_started(self):
124
+ """Ensure browser is started before operations."""
125
+ if not self._started:
126
+ raise RuntimeError("Browser not started. Call await browser.start() first.")
127
+
128
+ @property
129
+ def openai_cua_tool(self) -> Dict[str, Any]:
130
+ """
131
+ Tool definition for OpenAI computer use API.
132
+
133
+ Returns:
134
+ Tool definition dict for use with OpenAI responses API
135
+ """
136
+ return {
137
+ "type": "computer_use_preview",
138
+ "display_width": self.display_width,
139
+ "display_height": self.display_height,
140
+ "environment": "browser",
141
+ }
142
+
143
+ async def screenshot(self) -> str:
144
+ """
145
+ Take a screenshot and return base64 encoded string.
146
+
147
+ Returns:
148
+ Base64 encoded PNG screenshot
149
+ """
150
+ self._ensure_started()
151
+
152
+ png_bytes = await self._page.screenshot(full_page=False)
153
+ return base64.b64encode(png_bytes).decode("utf-8")
154
+
155
+ def get_current_url(self) -> str:
156
+ """Get the current page URL."""
157
+ self._ensure_started()
158
+ return self._page.url
159
+
160
+ async def execute_computer_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
161
+ """
162
+ Execute a computer action and return the result for OpenAI API.
163
+
164
+ Args:
165
+ action: Computer action dict from OpenAI response
166
+
167
+ Returns:
168
+ Result dict for computer_call_output
169
+ """
170
+ self._ensure_started()
171
+
172
+ action_type = action["type"]
173
+ action_args = {k: v for k, v in action.items() if k != "type"}
174
+
175
+ print(f"Executing: {action_type}({action_args})")
176
+
177
+ # Execute the action
178
+ if hasattr(self, f"_{action_type}"):
179
+ method = getattr(self, f"_{action_type}")
180
+ await method(**action_args)
181
+ else:
182
+ raise ValueError(f"Unsupported action type: {action_type}")
183
+
184
+ # Take screenshot after action
185
+ screenshot_base64 = await self.screenshot()
186
+
187
+ return {
188
+ "type": "input_image",
189
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
190
+ "current_url": self.get_current_url(),
191
+ }
192
+
193
+ # Computer action implementations
194
+ async def _click(self, x: int, y: int, button: str = "left") -> None:
195
+ """Click at coordinates."""
196
+ self._ensure_started()
197
+ await self._page.mouse.click(x, y, button=button)
198
+
199
+ async def _double_click(self, x: int, y: int) -> None:
200
+ """Double-click at coordinates."""
201
+ self._ensure_started()
202
+ await self._page.mouse.dblclick(x, y)
203
+
204
+ async def _scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
205
+ """Scroll from coordinates."""
206
+ self._ensure_started()
207
+ await self._page.mouse.move(x, y)
208
+ await self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
209
+
210
+ async def _type(self, text: str) -> None:
211
+ """Type text."""
212
+ self._ensure_started()
213
+ await self._page.keyboard.type(text)
214
+
215
+ async def _keypress(self, keys: List[str]) -> None:
216
+ """Press key combination."""
217
+ self._ensure_started()
218
+ mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
219
+ for key in mapped_keys:
220
+ await self._page.keyboard.down(key)
221
+ for key in reversed(mapped_keys):
222
+ await self._page.keyboard.up(key)
223
+
224
+ async def _move(self, x: int, y: int) -> None:
225
+ """Move mouse to coordinates."""
226
+ self._ensure_started()
227
+ await self._page.mouse.move(x, y)
228
+
229
+ async def _drag(self, path: List[Dict[str, int]]) -> None:
230
+ """Drag mouse along path."""
231
+ self._ensure_started()
232
+ if not path:
233
+ return
234
+ await self._page.mouse.move(path[0]["x"], path[0]["y"])
235
+ await self._page.mouse.down()
236
+ for point in path[1:]:
237
+ await self._page.mouse.move(point["x"], point["y"])
238
+ await self._page.mouse.up()
239
+
240
+ async def _wait(self, ms: int = 1000) -> None:
241
+ """Wait for specified milliseconds."""
242
+ import asyncio
243
+
244
+ await asyncio.sleep(ms / 1000)
245
+
246
+ # Browser-specific actions
247
+ async def _goto(self, url: str) -> None:
248
+ """Navigate to URL."""
249
+ self._ensure_started()
250
+ try:
251
+ await self._page.goto(url)
252
+ except Exception as e:
253
+ print(f"Error navigating to {url}: {e}")
254
+
255
+ async def _back(self) -> None:
256
+ """Go back in browser history."""
257
+ self._ensure_started()
258
+ await self._page.go_back()
259
+
260
+ async def _forward(self) -> None:
261
+ """Go forward in browser history."""
262
+ self._ensure_started()
263
+ await self._page.go_forward()
264
+
265
+ async def _refresh(self) -> None:
266
+ """Refresh the page."""
267
+ self._ensure_started()
268
+ await self._page.reload()
269
+
270
+ # ------------------------------------------------------------------
271
+ # Public aliases (no leading underscore) expected by the Agent &
272
+ # OpenAI computer-use API. They forward directly to the underscored
273
+ # implementations above so the external interface matches the older
274
+ # BasePlaywrightComputer class.
275
+ # ------------------------------------------------------------------
276
+
277
+ # Mouse / keyboard actions
278
+ click = _click
279
+ double_click = _double_click
280
+ scroll = _scroll
281
+ type = _type # noqa: A003 – shadowing built-in for API compatibility
282
+ keypress = _keypress
283
+ move = _move
284
+ drag = _drag
285
+ wait = _wait
286
+
287
+ # Browser navigation actions
288
+ goto = _goto
289
+ back = _back
290
+ forward = _forward
291
+ refresh = _refresh
fleet/resources/base.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from abc import ABC
2
- from ..env.models import Resource as ResourceModel, ResourceType, ResourceMode
2
+ from ..instance.models import Resource as ResourceModel, ResourceType, ResourceMode
3
3
 
4
4
 
5
5
  class Resource(ABC):
@@ -8,7 +8,7 @@ class Resource(ABC):
8
8
 
9
9
  @property
10
10
  def uri(self) -> str:
11
- return f"{self.resource.type}://{self.resource.name}"
11
+ return f"{self.resource.type.value}://{self.resource.name}"
12
12
 
13
13
  @property
14
14
  def name(self) -> str:
@@ -21,3 +21,6 @@ class Resource(ABC):
21
21
  @property
22
22
  def mode(self) -> ResourceMode:
23
23
  return self.resource.mode
24
+
25
+ def __repr__(self) -> str:
26
+ return f"Resource(uri={self.uri}, mode={self.mode.value})"
@@ -1,6 +1,5 @@
1
1
  from typing import Optional
2
-
3
- from ..env.models import (
2
+ from ..instance.models import (
4
3
  Resource as ResourceModel,
5
4
  CDPDescribeResponse,
6
5
  ChromeStartRequest,
@@ -11,7 +10,7 @@ from .base import Resource
11
10
  from typing import TYPE_CHECKING
12
11
 
13
12
  if TYPE_CHECKING:
14
- from ..env.base import AsyncWrapper
13
+ from ..instance.base import AsyncWrapper
15
14
 
16
15
 
17
16
  class AsyncBrowserResource(Resource):
@@ -19,16 +18,24 @@ class AsyncBrowserResource(Resource):
19
18
  super().__init__(resource)
20
19
  self.client = client
21
20
 
22
- async def start(
23
- self, start_request: Optional[ChromeStartRequest] = None
24
- ) -> ChromeStartResponse:
21
+ async def start(self, width: int = 1920, height: int = 1080) -> CDPDescribeResponse:
25
22
  response = await self.client.request(
26
23
  "POST",
27
24
  "/resources/cdp/start",
28
- json=start_request.model_dump() if start_request else None,
25
+ json=ChromeStartRequest(resolution=f"{width},{height}").model_dump(),
29
26
  )
30
- return ChromeStartResponse(**response.json())
27
+ ChromeStartResponse(**response.json())
28
+ return await self.describe()
31
29
 
32
30
  async def describe(self) -> CDPDescribeResponse:
33
31
  response = await self.client.request("GET", "/resources/cdp/describe")
32
+ if response.status_code != 200:
33
+ await self.start()
34
+ response = await self.client.request("GET", "/resources/cdp/describe")
34
35
  return CDPDescribeResponse(**response.json())
36
+
37
+ async def cdp_url(self) -> str:
38
+ return (await self.describe()).cdp_browser_url
39
+
40
+ async def devtools_url(self) -> str:
41
+ return (await self.describe()).cdp_devtools_url
fleet/resources/sqlite.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from typing import Any, List, Optional
2
- from ..env.models import Resource as ResourceModel
3
- from ..env.models import DescribeResponse, QueryRequest, QueryResponse
2
+ from ..instance.models import Resource as ResourceModel
3
+ from ..instance.models import DescribeResponse, QueryRequest, QueryResponse
4
4
  from .base import Resource
5
5
 
6
6
  from typing import TYPE_CHECKING
7
7
 
8
8
  if TYPE_CHECKING:
9
- from ..env.base import AsyncWrapper
9
+ from ..instance.base import AsyncWrapper
10
10
 
11
11
 
12
12
  class AsyncSQLiteResource(Resource):