fleet-python 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fleet-python might be problematic. Click here for more details.

@@ -1,241 +1,60 @@
1
- import time
2
- import base64
3
- from typing import List, Dict, Callable
4
- from playwright.sync_api import sync_playwright, Browser, Page
5
-
6
- # Optional: key mapping if your model uses "CUA" style keys
7
- CUA_KEY_TO_PLAYWRIGHT_KEY = {
8
- "/": "Divide",
9
- "\\": "Backslash",
10
- "alt": "Alt",
11
- "arrowdown": "ArrowDown",
12
- "arrowleft": "ArrowLeft",
13
- "arrowright": "ArrowRight",
14
- "arrowup": "ArrowUp",
15
- "backspace": "Backspace",
16
- "capslock": "CapsLock",
17
- "cmd": "Meta",
18
- "ctrl": "Control",
19
- "delete": "Delete",
20
- "end": "End",
21
- "enter": "Enter",
22
- "esc": "Escape",
23
- "home": "Home",
24
- "insert": "Insert",
25
- "option": "Alt",
26
- "pagedown": "PageDown",
27
- "pageup": "PageUp",
28
- "shift": "Shift",
29
- "space": " ",
30
- "super": "Meta",
31
- "tab": "Tab",
32
- "win": "Meta",
33
- }
34
-
35
-
36
- class BasePlaywrightComputer:
37
- """
38
- Abstract base for Playwright-based computers:
39
-
40
- - Subclasses override `_get_browser_and_page()` to do local or remote connection,
41
- returning (Browser, Page).
42
- - This base class handles context creation (`__enter__`/`__exit__`),
43
- plus standard "Computer" actions like click, scroll, etc.
44
- - We also have extra browser actions: `goto(url)` and `back()`.
45
- """
46
-
47
- def get_environment(self):
48
- return "browser"
49
-
50
- def get_dimensions(self):
51
- return (1024, 768)
52
-
53
- def __init__(self):
54
- self._playwright = None
55
- self._browser: Browser | None = None
56
- self._page: Page | None = None
57
-
58
- def __enter__(self):
59
- # Start Playwright and call the subclass hook for getting browser/page
60
- self._playwright = sync_playwright().start()
61
- self._browser, self._page = self._get_browser_and_page()
62
-
63
- # Set up network interception to flag URLs matching domains in BLOCKED_DOMAINS
64
- def handle_route(route, request):
65
- route.continue_()
66
-
67
- self._page.route("**/*", handle_route)
68
-
69
- return self
70
-
71
- def __exit__(self, exc_type, exc_val, exc_tb):
72
- if self._browser:
73
- self._browser.close()
74
- if self._playwright:
75
- self._playwright.stop()
76
-
77
- def get_current_url(self) -> str:
78
- return self._page.url
79
-
80
- # --- Common "Computer" actions ---
81
- def screenshot(self) -> str:
82
- """Capture only the viewport (not full_page)."""
83
- png_bytes = self._page.screenshot(full_page=False)
84
- return base64.b64encode(png_bytes).decode("utf-8")
85
-
86
- def click(self, x: int, y: int, button: str = "left") -> None:
87
- if button == "back":
88
- self.back()
89
- elif button == "forward":
90
- self.forward()
91
- elif button == "wheel":
92
- self._page.mouse.wheel(x, y)
93
- else:
94
- button_mapping = {"left": "left", "right": "right"}
95
- button_type = button_mapping.get(button, "left")
96
- self._page.mouse.click(x, y, button=button_type)
97
-
98
- def double_click(self, x: int, y: int) -> None:
99
- self._page.mouse.dblclick(x, y)
100
-
101
- def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
102
- self._page.mouse.move(x, y)
103
- self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
104
-
105
- def type(self, text: str) -> None:
106
- self._page.keyboard.type(text)
107
-
108
- def wait(self, ms: int = 1000) -> None:
109
- time.sleep(ms / 1000)
110
-
111
- def move(self, x: int, y: int) -> None:
112
- self._page.mouse.move(x, y)
113
-
114
- def keypress(self, keys: List[str]) -> None:
115
- mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
116
- for key in mapped_keys:
117
- self._page.keyboard.down(key)
118
- for key in reversed(mapped_keys):
119
- self._page.keyboard.up(key)
120
-
121
- def drag(self, path: List[Dict[str, int]]) -> None:
122
- if not path:
123
- return
124
- self._page.mouse.move(path[0]["x"], path[0]["y"])
125
- self._page.mouse.down()
126
- for point in path[1:]:
127
- self._page.mouse.move(point["x"], point["y"])
128
- self._page.mouse.up()
129
-
130
- # --- Extra browser-oriented actions ---
131
- def goto(self, url: str) -> None:
132
- try:
133
- return self._page.goto(url)
134
- except Exception as e:
135
- print(f"Error navigating to {url}: {e}")
136
-
137
- def back(self) -> None:
138
- return self._page.go_back()
139
-
140
- def forward(self) -> None:
141
- return self._page.go_forward()
142
-
143
- # --- Subclass hook ---
144
- def _get_browser_and_page(self) -> tuple[Browser, Page]:
145
- """Subclasses must implement, returning (Browser, Page)."""
146
- raise NotImplementedError
147
-
148
-
149
- class LocalPlaywrightBrowser(BasePlaywrightComputer):
150
- """Launches a local Chromium instance using Playwright."""
151
-
152
- def __init__(self, headless: bool = False):
153
- super().__init__()
154
- self.headless = headless
155
-
156
- def _get_browser_and_page(self) -> tuple[Browser, Page]:
157
- width, height = self.get_dimensions()
158
- launch_args = [
159
- f"--window-size={width},{height}",
160
- "--disable-extensions",
161
- "--disable-file-system",
162
- ]
163
- browser = self._playwright.chromium.launch(
164
- chromium_sandbox=True,
165
- headless=self.headless,
166
- args=launch_args,
167
- env={"DISPLAY": ":0"},
168
- )
169
-
170
- context = browser.new_context()
171
-
172
- # Add event listeners for page creation and closure
173
- context.on("page", self._handle_new_page)
1
+ import asyncio
2
+ from openai import AsyncOpenAI
3
+ import fleet as flt
4
+ import json
5
+ from typing import Callable
174
6
 
175
- page = context.new_page()
176
- page.set_viewport_size({"width": width, "height": height})
177
- page.on("close", self._handle_page_close)
178
7
 
179
- page.goto("https://bing.com")
8
+ client = AsyncOpenAI()
180
9
 
181
- return browser, page
182
10
 
183
- def _handle_new_page(self, page: Page):
184
- """Handle the creation of a new page."""
185
- print("New page created")
186
- self._page = page
187
- page.on("close", self._handle_page_close)
188
-
189
- def _handle_page_close(self, page: Page):
190
- """Handle the closure of a page."""
191
- print("Page closed")
192
- if self._page == page:
193
- if self._browser.contexts[0].pages:
194
- self._page = self._browser.contexts[0].pages[-1]
195
- else:
196
- print("Warning: All pages have been closed.")
197
- self._page = None
11
+ def sanitize_message(msg: dict) -> dict:
12
+ """Return a copy of the message with image_url omitted for computer_call_output messages."""
13
+ if msg.get("type") == "computer_call_output":
14
+ output = msg.get("output", {})
15
+ if isinstance(output, dict):
16
+ sanitized = msg.copy()
17
+ sanitized["output"] = {**output, "image_url": "[omitted]"}
18
+ return sanitized
19
+ return msg
198
20
 
199
21
 
200
22
  class Agent:
201
- """
202
- A sample agent class that can be used to interact with a computer.
203
-
204
- (See simple_cua_loop.py for a simple example without an agent.)
205
- """
206
-
207
23
  def __init__(
208
24
  self,
25
+ browser,
209
26
  model="computer-use-preview",
210
- computer: Computer = None,
211
27
  tools: list[dict] = [],
212
28
  acknowledge_safety_check_callback: Callable = lambda: False,
213
29
  ):
214
30
  self.model = model
215
- self.computer = computer
31
+ self.computer = browser
216
32
  self.tools = tools
217
33
  self.print_steps = True
218
34
  self.debug = False
219
35
  self.show_images = False
220
36
  self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
221
37
 
222
- if computer:
223
- dimensions = computer.get_dimensions()
38
+ if browser:
39
+ dimensions = browser.get_dimensions()
224
40
  self.tools += [
225
41
  {
226
42
  "type": "computer-preview",
227
43
  "display_width": dimensions[0],
228
44
  "display_height": dimensions[1],
229
- "environment": computer.get_environment(),
45
+ "environment": browser.get_environment(),
230
46
  },
231
47
  ]
232
48
 
233
49
  def debug_print(self, *args):
234
50
  if self.debug:
235
- pp(*args)
51
+ print(*args)
236
52
 
237
- def handle_item(self, item):
53
+ async def handle_item(self, item):
238
54
  """Handle each item; may cause a computer action + screenshot."""
55
+ if self.debug:
56
+ print(f"Handling item of type: {item.get('type')}")
57
+
239
58
  if item["type"] == "message":
240
59
  if self.print_steps:
241
60
  print(item["content"][0]["text"])
@@ -247,7 +66,7 @@ class Agent:
247
66
 
248
67
  if hasattr(self.computer, name): # if function exists on computer, call it
249
68
  method = getattr(self.computer, name)
250
- method(**args)
69
+ await method(**args)
251
70
  return [
252
71
  {
253
72
  "type": "function_call_output",
@@ -264,11 +83,9 @@ class Agent:
264
83
  print(f"{action_type}({action_args})")
265
84
 
266
85
  method = getattr(self.computer, action_type)
267
- method(**action_args)
86
+ await method(**action_args)
268
87
 
269
- screenshot_base64 = self.computer.screenshot()
270
- if self.show_images:
271
- show_image(screenshot_base64)
88
+ screenshot_base64 = await self.computer.screenshot()
272
89
 
273
90
  # if user doesn't ack all safety checks exit with error
274
91
  pending_checks = item.get("pending_safety_checks", [])
@@ -292,13 +109,12 @@ class Agent:
292
109
  # additional URL safety checks for browser environments
293
110
  if self.computer.get_environment() == "browser":
294
111
  current_url = self.computer.get_current_url()
295
- check_blocklisted_url(current_url)
296
112
  call_output["output"]["current_url"] = current_url
297
113
 
298
114
  return [call_output]
299
115
  return []
300
116
 
301
- def run_full_turn(
117
+ async def run_full_turn(
302
118
  self, input_items, print_steps=True, debug=False, show_images=False
303
119
  ):
304
120
  self.print_steps = print_steps
@@ -310,20 +126,108 @@ class Agent:
310
126
  while new_items[-1].get("role") != "assistant" if new_items else True:
311
127
  self.debug_print([sanitize_message(msg) for msg in input_items + new_items])
312
128
 
313
- response = create_response(
129
+ # The Responses API rejects unknown keys (e.g. `status`, `encrypted_content`).
130
+ # Strip them from every item before sending.
131
+ def _clean_item(msg: dict) -> dict:
132
+ unwanted_keys = {"status", "encrypted_content"}
133
+ return {k: v for k, v in msg.items() if k not in unwanted_keys}
134
+
135
+ clean_input = [_clean_item(m) for m in (input_items + new_items)]
136
+
137
+ response = await client.responses.create(
314
138
  model=self.model,
315
- input=input_items + new_items,
139
+ input=clean_input,
316
140
  tools=self.tools,
317
141
  truncation="auto",
318
142
  )
319
- self.debug_print(response)
320
143
 
321
- if "output" not in response and self.debug:
322
- print(response)
323
- raise ValueError("No output from model")
324
- else:
325
- new_items += response["output"]
326
- for item in response["output"]:
327
- new_items += self.handle_item(item)
144
+ # The OpenAI SDK returns a Pydantic model object, not a plain dict.
145
+ # Convert it to a standard Python dict so the rest of the code can
146
+ # remain unchanged from the previous implementation.
147
+ response_dict = (
148
+ response.model_dump() # pydantic v2
149
+ if hasattr(response, "model_dump")
150
+ else (
151
+ response.to_dict_recursive()
152
+ if hasattr(response, "to_dict_recursive")
153
+ else dict(response)
154
+ )
155
+ )
156
+ self.debug_print(response_dict)
157
+
158
+ # Guard against missing/empty output in the response
159
+ if not response_dict.get("output"):
160
+ if self.debug:
161
+ print("Full response:", response_dict)
162
+ if response_dict.get("error") is not None:
163
+ error_msg = response_dict["error"].get("message", "Unknown error")
164
+ raise ValueError(f"API Error: {error_msg}")
165
+ else:
166
+ raise ValueError("No output from model")
167
+
168
+ # Append each item from the model output to conversation history
169
+ # in the exact order we received them, **without filtering** so that
170
+ # required pairs such as reasoning → computer_call are preserved.
171
+ for item in response_dict["output"]:
172
+ # First, record the original item itself.
173
+ new_items.append(item)
174
+
175
+ # Next, perform any local side-effects (browser actions, etc.).
176
+ handled_items = await self.handle_item(item)
177
+
178
+ # If the handler generated additional items (e.g. computer_call_output)
179
+ # we append those *immediately* so the order remains:
180
+ # reasoning → computer_call → computer_call_output
181
+ if handled_items:
182
+ new_items += handled_items
328
183
 
329
184
  return new_items
185
+
186
+
187
+ tools = []
188
+
189
+
190
+ async def ainput(prompt: str = "") -> str:
191
+ """Async version of input()"""
192
+ loop = asyncio.get_event_loop()
193
+ return await loop.run_in_executor(None, input, prompt)
194
+
195
+
196
+ async def main():
197
+ # Create a Fleet environment instance
198
+ instance = await flt.env.make("hubspot")
199
+
200
+ # Create the Playwright wrapper
201
+ browser = flt.FleetPlaywrightWrapper(instance)
202
+ await browser.start()
203
+
204
+ try:
205
+ agent = Agent(browser, model="computer-use-preview", tools=[])
206
+ items = [
207
+ {
208
+ "role": "developer",
209
+ "content": "You have access to a clone of Hubspot. You can use the computer to navigate the browser and perform actions.",
210
+ }
211
+ ]
212
+
213
+ while True:
214
+ try:
215
+ user_input = await ainput("> ")
216
+ items.append({"role": "user", "content": user_input})
217
+ output_items = await agent.run_full_turn(
218
+ items, show_images=False, debug=False
219
+ )
220
+ items += output_items
221
+ except (EOFError, KeyboardInterrupt):
222
+ print("\nShutting down...")
223
+ break
224
+ except Exception as e:
225
+ print(f"Error during interaction: {e}")
226
+ # Continue the loop for other errors
227
+ finally:
228
+ await browser.close()
229
+ await instance.close()
230
+
231
+
232
+ if __name__ == "__main__":
233
+ asyncio.run(main())
@@ -0,0 +1,61 @@
1
+ import asyncio
2
+ from openai import AsyncOpenAI
3
+ import fleet as flt
4
+
5
+ client = AsyncOpenAI()
6
+
7
+
8
+ async def main():
9
+ instance = await flt.env.make("hubspot")
10
+
11
+ browser = flt.FleetPlaywrightWrapper(instance)
12
+ await browser.start()
13
+
14
+ try:
15
+ width, height = browser.get_dimensions()
16
+ tools = [
17
+ {
18
+ "type": "computer-preview",
19
+ "display_width": width,
20
+ "display_height": height,
21
+ "environment": browser.get_environment(),
22
+ }
23
+ ]
24
+
25
+ response = await client.responses.create(
26
+ model="computer-use-preview",
27
+ input=[
28
+ {
29
+ "role": "developer",
30
+ "content": "Create a HubSpot deal",
31
+ }
32
+ ],
33
+ tools=tools,
34
+ truncation="auto",
35
+ )
36
+
37
+ if len(response.output) != 0:
38
+ if response.output[0].type == "message":
39
+ print(response.output[0].content[0].text)
40
+
41
+ if response.output[0].type == "computer_call":
42
+ action = response.output[0].action
43
+ if action.type == "screenshot":
44
+ screenshot_base64 = await browser.screenshot()
45
+ result = {
46
+ "type": "input_image",
47
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
48
+ "current_url": browser.get_current_url(),
49
+ }
50
+ else:
51
+ result = await browser.execute_computer_action(action)
52
+
53
+ print("Computer action result:")
54
+ print(result)
55
+ finally:
56
+ await browser.close()
57
+ await instance.close()
58
+
59
+
60
+ if __name__ == "__main__":
61
+ asyncio.run(main())
examples/quickstart.py CHANGED
@@ -35,7 +35,7 @@ async def main():
35
35
  # 1. List available environments
36
36
  print("\n📋 Available environments:")
37
37
  try:
38
- environments = await fleet.env.list_envs()
38
+ environments = await fleet.instance.list_envs()
39
39
  for env in environments:
40
40
  print(f" - {env.env_key}: {env.name}")
41
41
  print(f" Description: {env.description}")
@@ -48,7 +48,7 @@ async def main():
48
48
  # 2. Create a new environment instance
49
49
  print("\n🚀 Creating new environment...")
50
50
  try:
51
- env = await fleet.env.make("fira:v1.2.5", region="us-west-1")
51
+ env = await fleet.instance.make("fira:v1.2.5", region="us-west-1")
52
52
  print(f"✅ Environment created with instance ID: {env.instance_id}")
53
53
 
54
54
  # Execute a simple action
@@ -85,7 +85,7 @@ async def main():
85
85
  # 3. List running instances
86
86
  print("\n🏃 Listing running instances...")
87
87
  try:
88
- instances = await fleet.env.list_instances(status="running")
88
+ instances = await fleet.instance.list_instances(status="running")
89
89
  if instances:
90
90
  print(f"Found {len(instances)} running instances:")
91
91
  for instance in instances:
@@ -99,13 +99,13 @@ async def main():
99
99
  print("\n🔗 Connecting to existing instance...")
100
100
  try:
101
101
  # Only get running instances
102
- running_instances = await fleet.env.list_instances(status="running")
102
+ running_instances = await fleet.instance.list_instances(status="running")
103
103
  if running_instances:
104
104
  # Find a running instance that's not the one we just created/deleted
105
105
  target_instance = running_instances[0]
106
106
  print(f"Connecting to running instance: {target_instance.instance_id}")
107
107
 
108
- env = await fleet.env.get(target_instance.instance_id)
108
+ env = await fleet.instance.get(target_instance.instance_id)
109
109
  print(f"✅ Connected to instance: {env.instance_id}")
110
110
 
111
111
  # Execute an action on the existing instance
fleet/__init__.py CHANGED
@@ -21,7 +21,8 @@ from .exceptions import (
21
21
  FleetConfigurationError,
22
22
  )
23
23
  from .client import Fleet, AsyncFleet, InstanceRequest
24
- from .env import (
24
+ from .instance import (
25
+ AsyncInstanceClient,
25
26
  ResetRequest,
26
27
  ResetResponse,
27
28
  CDPDescribeResponse,
@@ -29,6 +30,16 @@ from .env import (
29
30
  ChromeStartResponse,
30
31
  ChromeStatusResponse,
31
32
  )
33
+ from .verifiers import *
34
+ from . import env
35
+
36
+ # Optional playwright integration
37
+ try:
38
+ from .playwright import FleetPlaywrightWrapper
39
+ _PLAYWRIGHT_AVAILABLE = True
40
+ except ImportError:
41
+ FleetPlaywrightWrapper = None
42
+ _PLAYWRIGHT_AVAILABLE = False
32
43
 
33
44
  __version__ = "0.1.1"
34
45
  __all__ = [
@@ -39,6 +50,7 @@ __all__ = [
39
50
  "FleetConfigurationError",
40
51
  "Fleet",
41
52
  "AsyncFleet",
53
+ "AsyncInstanceClient",
42
54
  "InstanceRequest",
43
55
  "ResetRequest",
44
56
  "ResetResponse",
@@ -47,3 +59,7 @@ __all__ = [
47
59
  "ChromeStartResponse",
48
60
  "ChromeStatusResponse",
49
61
  ]
62
+
63
+ # Add playwright wrapper to exports if available
64
+ if _PLAYWRIGHT_AVAILABLE:
65
+ __all__.append("FleetPlaywrightWrapper")
fleet/base.py CHANGED
@@ -4,7 +4,7 @@ from typing import Dict, Any, Optional
4
4
  from .models import InstanceResponse
5
5
 
6
6
 
7
- class InstanceBase(InstanceResponse):
7
+ class EnvironmentBase(InstanceResponse):
8
8
  @property
9
9
  def manager_url(self) -> str:
10
10
  return f"{self.urls.manager.api}"