fleet-python 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fleet-python might be problematic. Click here for more details.

@@ -0,0 +1,448 @@
1
+ import base64
2
+ from typing import List, Dict, Callable, Optional
3
+ from playwright.async_api import async_playwright, Browser, Page
4
+ import httpx
5
+ import json
6
+ import io
7
+ from io import BytesIO
8
+ from PIL import Image
9
+ import os
10
+ import asyncio
11
+ import fleet as flt
12
+
13
+
14
+ def sanitize_message(msg: dict) -> dict:
15
+ """Return a copy of the message with image_url omitted for computer_call_output messages."""
16
+ if msg.get("type") == "computer_call_output":
17
+ output = msg.get("output", {})
18
+ if isinstance(output, dict):
19
+ sanitized = msg.copy()
20
+ sanitized["output"] = {**output, "image_url": "[omitted]"}
21
+ return sanitized
22
+ return msg
23
+
24
+
25
+ async def create_response(**kwargs):
26
+ url = "https://api.openai.com/v1/responses"
27
+ headers = {
28
+ "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
29
+ "Content-Type": "application/json",
30
+ }
31
+
32
+ openai_org = os.getenv("OPENAI_ORG")
33
+ if openai_org:
34
+ headers["Openai-Organization"] = openai_org
35
+
36
+ # Configure timeout: 30 seconds for connect, 60 seconds for read
37
+ timeout = httpx.Timeout(connect=60.0, read=60.0, write=60.0, pool=60.0)
38
+
39
+ async with httpx.AsyncClient(timeout=timeout) as client:
40
+ response = await client.post(url, headers=headers, json=kwargs)
41
+
42
+ if response.status_code != 200:
43
+ print(f"Error: {response.status_code} {response.text}")
44
+
45
+ return response.json()
46
+
47
+
48
+ def pp(obj):
49
+ print(json.dumps(obj, indent=4))
50
+
51
+
52
+ def show_image(base_64_image):
53
+ image_data = base64.b64decode(base_64_image)
54
+ image = Image.open(BytesIO(image_data))
55
+ image.show()
56
+
57
+
58
+ def calculate_image_dimensions(base_64_image):
59
+ image_data = base64.b64decode(base_64_image)
60
+ image = Image.open(io.BytesIO(image_data))
61
+ return image.size
62
+
63
+
64
+ # Optional: key mapping if your model uses "CUA" style keys
65
+ CUA_KEY_TO_PLAYWRIGHT_KEY = {
66
+ "/": "Divide",
67
+ "\\": "Backslash",
68
+ "alt": "Alt",
69
+ "arrowdown": "ArrowDown",
70
+ "arrowleft": "ArrowLeft",
71
+ "arrowright": "ArrowRight",
72
+ "arrowup": "ArrowUp",
73
+ "backspace": "Backspace",
74
+ "capslock": "CapsLock",
75
+ "cmd": "Meta",
76
+ "ctrl": "Control",
77
+ "delete": "Delete",
78
+ "end": "End",
79
+ "enter": "Enter",
80
+ "esc": "Escape",
81
+ "home": "Home",
82
+ "insert": "Insert",
83
+ "option": "Alt",
84
+ "pagedown": "PageDown",
85
+ "pageup": "PageUp",
86
+ "shift": "Shift",
87
+ "space": " ",
88
+ "super": "Meta",
89
+ "tab": "Tab",
90
+ "win": "Meta",
91
+ }
92
+
93
+
94
+ class BasePlaywrightComputer:
95
+ """
96
+ Abstract base for Playwright-based computers:
97
+
98
+ - Subclasses override `_get_browser_and_page()` to do local or remote connection,
99
+ returning (Browser, Page).
100
+ - This base class handles context creation (`__enter__`/`__exit__`),
101
+ plus standard "Computer" actions like click, scroll, etc.
102
+ - We also have extra browser actions: `goto(url)` and `back()`.
103
+ """
104
+
105
+ def get_environment(self):
106
+ return "browser"
107
+
108
+ def get_dimensions(self):
109
+ return (1920, 1080)
110
+
111
+ def __init__(self):
112
+ self._playwright = None
113
+ self._browser: Browser | None = None
114
+ self._page: Page | None = None
115
+
116
+ async def __aenter__(self):
117
+ # Start Playwright and call the subclass hook for getting browser/page
118
+ self._playwright = await async_playwright().start()
119
+ self._browser, self._page = await self._get_browser_and_page()
120
+
121
+ # Set up network interception to flag URLs matching domains in BLOCKED_DOMAINS
122
+ async def handle_route(route, request):
123
+ await route.continue_()
124
+
125
+ await self._page.route("**/*", handle_route)
126
+
127
+ return self
128
+
129
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
130
+ # if self._browser:
131
+ # await self._browser.close()
132
+ if self._playwright:
133
+ await self._playwright.stop()
134
+
135
+ def get_current_url(self) -> str:
136
+ return self._page.url
137
+
138
+ # --- Common "Computer" actions ---
139
+ async def screenshot(self) -> str:
140
+ """Capture only the viewport (not full_page)."""
141
+ png_bytes = await self._page.screenshot(full_page=False)
142
+ return base64.b64encode(png_bytes).decode("utf-8")
143
+
144
+ async def click(self, x: int, y: int, button: str = "left") -> None:
145
+ if button == "back":
146
+ await self.back()
147
+ elif button == "forward":
148
+ await self.forward()
149
+ elif button == "wheel":
150
+ await self._page.mouse.wheel(x, y)
151
+ else:
152
+ button_mapping = {"left": "left", "right": "right"}
153
+ button_type = button_mapping.get(button, "left")
154
+ await self._page.mouse.click(x, y, button=button_type)
155
+
156
+ async def double_click(self, x: int, y: int) -> None:
157
+ await self._page.mouse.dblclick(x, y)
158
+
159
+ async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
160
+ await self._page.mouse.move(x, y)
161
+ await self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
162
+
163
+ async def type(self, text: str) -> None:
164
+ await self._page.keyboard.type(text)
165
+
166
+ async def wait(self, ms: int = 1000) -> None:
167
+ await asyncio.sleep(ms / 1000)
168
+
169
+ async def move(self, x: int, y: int) -> None:
170
+ await self._page.mouse.move(x, y)
171
+
172
+ async def keypress(self, keys: List[str]) -> None:
173
+ mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
174
+ for key in mapped_keys:
175
+ await self._page.keyboard.down(key)
176
+ for key in reversed(mapped_keys):
177
+ await self._page.keyboard.up(key)
178
+
179
+ async def drag(self, path: List[Dict[str, int]]) -> None:
180
+ if not path:
181
+ return
182
+ await self._page.mouse.move(path[0]["x"], path[0]["y"])
183
+ await self._page.mouse.down()
184
+ for point in path[1:]:
185
+ await self._page.mouse.move(point["x"], point["y"])
186
+ await self._page.mouse.up()
187
+
188
+ # --- Extra browser-oriented actions ---
189
+ async def goto(self, url: str) -> None:
190
+ try:
191
+ return await self._page.goto(url)
192
+ except Exception as e:
193
+ print(f"Error navigating to {url}: {e}")
194
+
195
+ async def back(self) -> None:
196
+ return await self._page.go_back()
197
+
198
+ async def forward(self) -> None:
199
+ return await self._page.go_forward()
200
+
201
+ # --- Subclass hook ---
202
+ async def _get_browser_and_page(self) -> tuple[Browser, Page]:
203
+ """Subclasses must implement, returning (Browser, Page)."""
204
+ raise NotImplementedError
205
+
206
+
207
+ class FleetPlaywrightBrowser(BasePlaywrightComputer):
208
+ """Launches a local Chromium instance using Playwright."""
209
+
210
+ def __init__(
211
+ self,
212
+ fleet: flt.AsyncFleet,
213
+ env_key: str,
214
+ version: Optional[str] = None,
215
+ headless: bool = False,
216
+ ):
217
+ super().__init__()
218
+ self.fleet = fleet
219
+ self.env_key = env_key
220
+ self.version = version
221
+ self.headless = headless
222
+
223
+ async def _get_browser_and_page(self) -> tuple[Browser, Page]:
224
+ width, height = self.get_dimensions()
225
+
226
+ # Create an instance of the environment
227
+ print(f"Creating instance of {self.env_key} {self.version}...")
228
+ self.instance = await self.fleet.make(
229
+ flt.InstanceRequest(env_key=self.env_key, version=self.version)
230
+ )
231
+
232
+ # Start the browser
233
+ print("Starting browser...")
234
+ await self.instance.env.browser("cdp").start()
235
+ print("Getting CDP URL...")
236
+ cdp = await self.instance.env.browser("cdp").describe()
237
+ print("DevTools URL:", cdp.cdp_devtools_url)
238
+
239
+ # Connect to the browser
240
+ browser = await self._playwright.chromium.connect_over_cdp(cdp.cdp_browser_url)
241
+
242
+ # Add event listeners for page creation and closure
243
+ context = browser.contexts[0]
244
+ context.on("page", self._handle_new_page)
245
+
246
+ page = context.pages[0]
247
+ await page.set_viewport_size({"width": width, "height": height})
248
+ page.on("close", self._handle_page_close)
249
+
250
+ return browser, page
251
+
252
+ def _handle_new_page(self, page: Page):
253
+ """Handle the creation of a new page."""
254
+ print("New page created")
255
+ self._page = page
256
+ page.on("close", self._handle_page_close)
257
+
258
+ def _handle_page_close(self, page: Page):
259
+ """Handle the closure of a page."""
260
+ print("Page closed")
261
+ if self._page == page:
262
+ if self._browser.contexts[0].pages:
263
+ self._page = self._browser.contexts[0].pages[-1]
264
+ else:
265
+ print("Warning: All pages have been closed.")
266
+ self._page = None
267
+
268
+
269
+ class Agent:
270
+ """
271
+ A sample agent class that can be used to interact with a computer.
272
+
273
+ (See simple_cua_loop.py for a simple example without an agent.)
274
+ """
275
+
276
+ def __init__(
277
+ self,
278
+ model="computer-use-preview",
279
+ computer: FleetPlaywrightBrowser = None,
280
+ tools: list[dict] = [],
281
+ acknowledge_safety_check_callback: Callable = lambda: False,
282
+ ):
283
+ self.model = model
284
+ self.computer = computer
285
+ self.tools = tools
286
+ self.print_steps = True
287
+ self.debug = False
288
+ self.show_images = False
289
+ self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
290
+
291
+ if computer:
292
+ dimensions = computer.get_dimensions()
293
+ self.tools += [
294
+ {
295
+ "type": "computer-preview",
296
+ "display_width": dimensions[0],
297
+ "display_height": dimensions[1],
298
+ "environment": computer.get_environment(),
299
+ },
300
+ ]
301
+
302
+ def debug_print(self, *args):
303
+ if self.debug:
304
+ pp(*args)
305
+
306
+ async def handle_item(self, item):
307
+ """Handle each item; may cause a computer action + screenshot."""
308
+ if self.debug:
309
+ print(f"Handling item of type: {item.get('type')}")
310
+
311
+ if item["type"] == "message":
312
+ if self.print_steps:
313
+ print(item["content"][0]["text"])
314
+
315
+ if item["type"] == "function_call":
316
+ name, args = item["name"], json.loads(item["arguments"])
317
+ if self.print_steps:
318
+ print(f"{name}({args})")
319
+
320
+ if hasattr(self.computer, name): # if function exists on computer, call it
321
+ method = getattr(self.computer, name)
322
+ await method(**args)
323
+ return [
324
+ {
325
+ "type": "function_call_output",
326
+ "call_id": item["call_id"],
327
+ "output": "success", # hard-coded output for demo
328
+ }
329
+ ]
330
+
331
+ if item["type"] == "computer_call":
332
+ action = item["action"]
333
+ action_type = action["type"]
334
+ action_args = {k: v for k, v in action.items() if k != "type"}
335
+ if self.print_steps:
336
+ print(f"{action_type}({action_args})")
337
+
338
+ method = getattr(self.computer, action_type)
339
+ await method(**action_args)
340
+
341
+ screenshot_base64 = await self.computer.screenshot()
342
+ if self.show_images:
343
+ show_image(screenshot_base64)
344
+
345
+ # if user doesn't ack all safety checks exit with error
346
+ pending_checks = item.get("pending_safety_checks", [])
347
+ for check in pending_checks:
348
+ message = check["message"]
349
+ if not self.acknowledge_safety_check_callback(message):
350
+ raise ValueError(
351
+ f"Safety check failed: {message}. Cannot continue with unacknowledged safety checks."
352
+ )
353
+
354
+ call_output = {
355
+ "type": "computer_call_output",
356
+ "call_id": item["call_id"],
357
+ "acknowledged_safety_checks": pending_checks,
358
+ "output": {
359
+ "type": "input_image",
360
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
361
+ },
362
+ }
363
+
364
+ # additional URL safety checks for browser environments
365
+ if self.computer.get_environment() == "browser":
366
+ current_url = self.computer.get_current_url()
367
+ call_output["output"]["current_url"] = current_url
368
+
369
+ return [call_output]
370
+ return []
371
+
372
+ async def run_full_turn(
373
+ self, input_items, print_steps=True, debug=False, show_images=False
374
+ ):
375
+ self.print_steps = print_steps
376
+ self.debug = debug
377
+ self.show_images = show_images
378
+ new_items = []
379
+
380
+ # keep looping until we get a final response
381
+ while new_items[-1].get("role") != "assistant" if new_items else True:
382
+ self.debug_print([sanitize_message(msg) for msg in input_items + new_items])
383
+
384
+ response = await create_response(
385
+ model=self.model,
386
+ input=input_items + new_items,
387
+ tools=self.tools,
388
+ truncation="auto",
389
+ )
390
+ self.debug_print(response)
391
+
392
+ if "output" not in response:
393
+ if self.debug:
394
+ print("Full response:", response)
395
+ if "error" in response:
396
+ error_msg = response["error"].get("message", "Unknown error")
397
+ raise ValueError(f"API Error: {error_msg}")
398
+ else:
399
+ raise ValueError("No output from model")
400
+ else:
401
+ # Append each item from the model output to conversation history
402
+ # in the exact order we received them, **without filtering** so that
403
+ # required pairs such as reasoning → computer_call are preserved.
404
+ for item in response["output"]:
405
+ # First, record the original item itself.
406
+ new_items.append(item)
407
+
408
+ # Next, perform any local side-effects (browser actions, etc.).
409
+ handled_items = await self.handle_item(item)
410
+
411
+ # If the handler generated additional items (e.g. computer_call_output)
412
+ # we append those *immediately* so the order remains:
413
+ # reasoning → computer_call → computer_call_output
414
+ if handled_items:
415
+ new_items += handled_items
416
+
417
+ return new_items
418
+
419
+
420
+ tools = []
421
+
422
+
423
+ async def ainput(prompt: str = "") -> str:
424
+ """Async version of input()"""
425
+ loop = asyncio.get_event_loop()
426
+ return await loop.run_in_executor(None, input, prompt)
427
+
428
+
429
+ async def main():
430
+ fleet = flt.AsyncFleet()
431
+
432
+ async with FleetPlaywrightBrowser(fleet, "hubspot", "v1.2.7") as computer:
433
+ agent = Agent(computer=computer, tools=tools)
434
+ items = [
435
+ {
436
+ "role": "developer",
437
+ "content": "You have access to a clone of Hubspot. You can use the computer to navigate the browser and perform actions.",
438
+ }
439
+ ]
440
+ while True:
441
+ user_input = await ainput("> ")
442
+ items.append({"role": "user", "content": user_input})
443
+ output_items = await agent.run_full_turn(items, show_images=False, debug=False)
444
+ items += output_items
445
+
446
+
447
+ if __name__ == "__main__":
448
+ asyncio.run(main())
examples/quickstart.py CHANGED
@@ -35,7 +35,7 @@ async def main():
35
35
  # 1. List available environments
36
36
  print("\n📋 Available environments:")
37
37
  try:
38
- environments = await fleet.env.list_envs()
38
+ environments = await fleet.manager.list_envs()
39
39
  for env in environments:
40
40
  print(f" - {env.env_key}: {env.name}")
41
41
  print(f" Description: {env.description}")
@@ -48,7 +48,7 @@ async def main():
48
48
  # 2. Create a new environment instance
49
49
  print("\n🚀 Creating new environment...")
50
50
  try:
51
- env = await fleet.env.make("fira:v1.2.5", region="us-west-1")
51
+ env = await fleet.manager.make("fira:v1.2.5", region="us-west-1")
52
52
  print(f"✅ Environment created with instance ID: {env.instance_id}")
53
53
 
54
54
  # Execute a simple action
@@ -85,7 +85,7 @@ async def main():
85
85
  # 3. List running instances
86
86
  print("\n🏃 Listing running instances...")
87
87
  try:
88
- instances = await fleet.env.list_instances(status="running")
88
+ instances = await fleet.manager.list_instances(status="running")
89
89
  if instances:
90
90
  print(f"Found {len(instances)} running instances:")
91
91
  for instance in instances:
@@ -99,13 +99,13 @@ async def main():
99
99
  print("\n🔗 Connecting to existing instance...")
100
100
  try:
101
101
  # Only get running instances
102
- running_instances = await fleet.env.list_instances(status="running")
102
+ running_instances = await fleet.manager.list_instances(status="running")
103
103
  if running_instances:
104
104
  # Find a running instance that's not the one we just created/deleted
105
105
  target_instance = running_instances[0]
106
106
  print(f"Connecting to running instance: {target_instance.instance_id}")
107
107
 
108
- env = await fleet.env.get(target_instance.instance_id)
108
+ env = await fleet.manager.get(target_instance.instance_id)
109
109
  print(f"✅ Connected to instance: {env.instance_id}")
110
110
 
111
111
  # Execute an action on the existing instance
fleet/__init__.py CHANGED
@@ -14,17 +14,38 @@
14
14
 
15
15
  """Fleet Python SDK - Environment-based AI agent interactions."""
16
16
 
17
- from .exceptions import FleetError, FleetAPIError, FleetTimeoutError, FleetConfigurationError
17
+ from .exceptions import (
18
+ FleetError,
19
+ FleetAPIError,
20
+ FleetTimeoutError,
21
+ FleetConfigurationError,
22
+ )
18
23
  from .client import Fleet, AsyncFleet, InstanceRequest
24
+ from .manager import (
25
+ ResetRequest,
26
+ ResetResponse,
27
+ CDPDescribeResponse,
28
+ ChromeStartRequest,
29
+ ChromeStartResponse,
30
+ ChromeStatusResponse,
31
+ )
32
+ from .verifiers import *
33
+ from . import env
19
34
 
20
35
  __version__ = "0.1.1"
21
36
  __all__ = [
22
37
  "env",
23
38
  "FleetError",
24
- "FleetAPIError",
39
+ "FleetAPIError",
25
40
  "FleetTimeoutError",
26
41
  "FleetConfigurationError",
27
42
  "Fleet",
28
43
  "AsyncFleet",
29
44
  "InstanceRequest",
30
- ]
45
+ "ResetRequest",
46
+ "ResetResponse",
47
+ "CDPDescribeResponse",
48
+ "ChromeStartRequest",
49
+ "ChromeStartResponse",
50
+ "ChromeStatusResponse",
51
+ ]
fleet/base.py CHANGED
@@ -4,7 +4,7 @@ from typing import Dict, Any, Optional
4
4
  from .models import InstanceResponse
5
5
 
6
6
 
7
- class InstanceBase(InstanceResponse):
7
+ class EnvironmentBase(InstanceResponse):
8
8
  @property
9
9
  def manager_url(self) -> str:
10
10
  return f"{self.urls.manager.api}"