fleet-python 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fleet-python might be problematic. Click here for more details.
- {fleet_python-0.2.0/fleet_python.egg-info → fleet_python-0.2.1}/PKG-INFO +1 -1
- fleet_python-0.2.0/examples/browser_control_example.py → fleet_python-0.2.1/examples/example.py +14 -14
- fleet_python-0.2.1/examples/nova_act_example.py +180 -0
- fleet_python-0.2.1/examples/openai_example.py +329 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/__init__.py +22 -3
- fleet_python-0.2.1/fleet/env/__init__.py +22 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/env/client.py +27 -9
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/env/models.py +15 -14
- fleet_python-0.2.1/fleet/resources/browser.py +34 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/resources/sqlite.py +2 -2
- {fleet_python-0.2.0 → fleet_python-0.2.1/fleet_python.egg-info}/PKG-INFO +1 -1
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet_python.egg-info/SOURCES.txt +3 -1
- {fleet_python-0.2.0 → fleet_python-0.2.1}/pyproject.toml +1 -1
- fleet_python-0.2.0/fleet/env/__init__.py +0 -8
- fleet_python-0.2.0/fleet/resources/browser.py +0 -18
- {fleet_python-0.2.0 → fleet_python-0.2.1}/LICENSE +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/README.md +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/examples/quickstart.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/base.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/client.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/env/base.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/exceptions.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/models.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet/resources/base.py +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet_python.egg-info/dependency_links.txt +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet_python.egg-info/requires.txt +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/fleet_python.egg-info/top_level.txt +0 -0
- {fleet_python-0.2.0 → fleet_python-0.2.1}/setup.cfg +0 -0
fleet_python-0.2.0/examples/browser_control_example.py → fleet_python-0.2.1/examples/example.py
RENAMED
|
@@ -11,22 +11,21 @@ async def main():
|
|
|
11
11
|
environments = await fleet.list_envs()
|
|
12
12
|
print("Environments:", len(environments))
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
print("Instance:", instance.instance_id)
|
|
19
|
-
print("Instance Environment:", instance.env_key)
|
|
14
|
+
# Create a new instance
|
|
15
|
+
instance = await fleet.make(
|
|
16
|
+
flt.InstanceRequest(env_key="hubspot", version="v1.2.7")
|
|
17
|
+
)
|
|
18
|
+
print("New Instance:", instance.instance_id)
|
|
20
19
|
|
|
21
20
|
environment = await fleet.environment(instance.env_key)
|
|
22
21
|
print("Environment Default Version:", environment.default_version)
|
|
23
22
|
|
|
24
|
-
response = await instance.env.reset()
|
|
23
|
+
response = await instance.env.reset(flt.ResetRequest(seed=42))
|
|
25
24
|
print("Reset response:", response)
|
|
26
25
|
|
|
27
26
|
print(await instance.env.resources())
|
|
28
27
|
|
|
29
|
-
sqlite = instance.env.
|
|
28
|
+
sqlite = instance.env.db("current")
|
|
30
29
|
print("SQLite:", await sqlite.describe())
|
|
31
30
|
|
|
32
31
|
print("Query:", await sqlite.query("SELECT * FROM users"))
|
|
@@ -34,13 +33,14 @@ async def main():
|
|
|
34
33
|
sqlite = await instance.env.state("sqlite://current").describe()
|
|
35
34
|
print("SQLite:", sqlite)
|
|
36
35
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
36
|
+
await instance.env.browser("cdp").start(
|
|
37
|
+
flt.ChromeStartRequest(resolution="1920,1080")
|
|
38
|
+
)
|
|
40
39
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
print("
|
|
40
|
+
browser = await instance.env.browser("cdp").describe()
|
|
41
|
+
print("CDP Page URL:", browser.cdp_page_url)
|
|
42
|
+
print("CDP Browser URL:", browser.cdp_browser_url)
|
|
43
|
+
print("CDP Devtools URL:", browser.cdp_devtools_url)
|
|
44
44
|
|
|
45
45
|
# Delete the instance
|
|
46
46
|
instance = await fleet.delete(instance.instance_id)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Nova Act + Fleet SDK Integration Example
|
|
3
|
+
|
|
4
|
+
This example demonstrates how to use Amazon Nova Act (an AI-powered browser automation SDK)
|
|
5
|
+
with Fleet's browser instances. Nova Act can navigate websites, fill forms, and extract data
|
|
6
|
+
using natural language commands.
|
|
7
|
+
|
|
8
|
+
Requirements:
|
|
9
|
+
1. Fleet SDK: pip install fleet-python
|
|
10
|
+
2. Nova Act SDK: pip install nova-act
|
|
11
|
+
3. Playwright Chrome: playwright install chrome
|
|
12
|
+
4. Environment variables:
|
|
13
|
+
- FLEET_API_KEY: Your Fleet API key
|
|
14
|
+
- NOVA_ACT_API_KEY: Your Nova Act API key (get from https://nova.amazon.com/act)
|
|
15
|
+
|
|
16
|
+
Note: Nova Act is currently only available in the US as a research preview.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
export FLEET_API_KEY=your_fleet_key
|
|
20
|
+
export NOVA_ACT_API_KEY=your_nova_act_key
|
|
21
|
+
python examples/nova_act_example.py
|
|
22
|
+
|
|
23
|
+
Important: Nova Act typically creates its own browser instance. Integration with
|
|
24
|
+
Fleet's CDP endpoint may not be fully supported in the current version.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import asyncio
|
|
28
|
+
import fleet as flt
|
|
29
|
+
import nova_act
|
|
30
|
+
import os
|
|
31
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_nova_act_sync():
|
|
35
|
+
"""Test Nova Act in a synchronous context (outside asyncio loop)."""
|
|
36
|
+
print("\n🧪 Testing Nova Act independently...")
|
|
37
|
+
try:
|
|
38
|
+
with nova_act.NovaAct(
|
|
39
|
+
headless=False,
|
|
40
|
+
starting_page="https://example.com"
|
|
41
|
+
) as nova:
|
|
42
|
+
print("✅ Nova Act initialized successfully!")
|
|
43
|
+
result = nova.act("What is the main heading on this page?")
|
|
44
|
+
print(f"Test result: {result}")
|
|
45
|
+
return True
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f"❌ Nova Act test failed: {type(e).__name__}: {str(e)}")
|
|
48
|
+
import traceback
|
|
49
|
+
traceback.print_exc()
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def run_nova_act_with_fleet_data(fleet_app_url):
|
|
54
|
+
"""Run Nova Act examples using Fleet's app URL."""
|
|
55
|
+
print("\n🤖 Starting Nova Act with Fleet app URL...")
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
with nova_act.NovaAct(
|
|
59
|
+
headless=False,
|
|
60
|
+
starting_page=fleet_app_url,
|
|
61
|
+
cdp_endpoint_url="wss://05bd8217.fleetai.com/cdp/devtools/browser/288477c8-2a6d-4e66-b8de-29bc3033c7a2"
|
|
62
|
+
) as nova:
|
|
63
|
+
print("✅ Nova Act started successfully!")
|
|
64
|
+
run_nova_examples(nova)
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"❌ Error during Nova Act operations: {type(e).__name__}: {str(e)}")
|
|
68
|
+
import traceback
|
|
69
|
+
traceback.print_exc()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def run_nova_examples(nova):
|
|
73
|
+
"""Run Nova Act examples in a separate function."""
|
|
74
|
+
# Example 1: Navigate and interact with a website
|
|
75
|
+
print("\n📝 Example 1: Basic navigation and interaction")
|
|
76
|
+
nova.act("Navigate to https://example.com")
|
|
77
|
+
|
|
78
|
+
# Extract page title
|
|
79
|
+
result = nova.act(
|
|
80
|
+
"What is the title of this page?",
|
|
81
|
+
schema={"type": "object", "properties": {"title": {"type": "string"}}},
|
|
82
|
+
)
|
|
83
|
+
if result.matches_schema:
|
|
84
|
+
print(f"Page title: {result.parsed_response.get('title')}")
|
|
85
|
+
|
|
86
|
+
# Example 2: More complex interaction
|
|
87
|
+
print("\n📝 Example 2: Search on a website")
|
|
88
|
+
nova.act("Navigate to https://www.python.org")
|
|
89
|
+
nova.act("Search for 'asyncio' in the search box")
|
|
90
|
+
|
|
91
|
+
# Example 3: Extract structured data
|
|
92
|
+
print("\n📝 Example 3: Extract structured information")
|
|
93
|
+
result = nova.act(
|
|
94
|
+
"Find the first 3 search results and return their titles",
|
|
95
|
+
schema={
|
|
96
|
+
"type": "object",
|
|
97
|
+
"properties": {
|
|
98
|
+
"results": {"type": "array", "items": {"type": "string"}}
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
if result.matches_schema:
|
|
103
|
+
results = result.parsed_response.get("results", [])
|
|
104
|
+
print("Search results:")
|
|
105
|
+
for i, title in enumerate(results, 1):
|
|
106
|
+
print(f" {i}. {title}")
|
|
107
|
+
|
|
108
|
+
# Example 4: Fill out a form
|
|
109
|
+
print("\n📝 Example 4: Form interaction")
|
|
110
|
+
nova.act("Navigate to https://httpbin.org/forms/post")
|
|
111
|
+
nova.act("Fill the customer name field with 'John Doe'")
|
|
112
|
+
nova.act("Select 'Medium' for the size")
|
|
113
|
+
nova.act("Check the 'Bacon' topping")
|
|
114
|
+
|
|
115
|
+
# You can also use nova_act's ability to take screenshots
|
|
116
|
+
print("\n📸 Taking screenshot...")
|
|
117
|
+
nova.act("Take a screenshot of the current page")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def main():
|
|
121
|
+
"""Main async function for Fleet operations."""
|
|
122
|
+
|
|
123
|
+
# Check for Nova Act API key
|
|
124
|
+
nova_api_key = os.getenv("NOVA_ACT_API_KEY")
|
|
125
|
+
if not nova_api_key:
|
|
126
|
+
print("❌ NOVA_ACT_API_KEY environment variable not set!")
|
|
127
|
+
print("Please set it with: export NOVA_ACT_API_KEY=your_api_key")
|
|
128
|
+
return
|
|
129
|
+
else:
|
|
130
|
+
print(f"✅ Nova Act API key found: {nova_api_key[:8]}...{nova_api_key[-4:]}")
|
|
131
|
+
|
|
132
|
+
# Test Nova Act outside of asyncio loop
|
|
133
|
+
# with ThreadPoolExecutor() as executor:
|
|
134
|
+
# nova_test_future = executor.submit(test_nova_act_sync)
|
|
135
|
+
# nova_works = nova_test_future.result()
|
|
136
|
+
|
|
137
|
+
# if not nova_works:
|
|
138
|
+
# print("\nNova Act is not working properly. Please check:")
|
|
139
|
+
# print("1. You have a valid NOVA_ACT_API_KEY")
|
|
140
|
+
# print("2. You have installed nova-act: pip install nova-act")
|
|
141
|
+
# print("3. You have playwright installed: playwright install chrome")
|
|
142
|
+
# return
|
|
143
|
+
|
|
144
|
+
# Initialize Fleet client
|
|
145
|
+
fleet = flt.AsyncFleet()
|
|
146
|
+
print("\n🚀 Initializing Fleet client...")
|
|
147
|
+
|
|
148
|
+
instance = await fleet.instance("05bd8217")
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Reset the environment to ensure clean state
|
|
152
|
+
# print("🔄 Resetting environment...")
|
|
153
|
+
# await instance.env.reset()
|
|
154
|
+
|
|
155
|
+
# Get browser resource from Fleet
|
|
156
|
+
browser = await instance.env.browser("cdp").describe()
|
|
157
|
+
print(f"🌐 CDP URL: {browser.url}")
|
|
158
|
+
print(f"🔧 DevTools URL: {browser.devtools_url}")
|
|
159
|
+
|
|
160
|
+
# Run Nova Act in a separate thread to avoid asyncio conflicts
|
|
161
|
+
with ThreadPoolExecutor() as executor:
|
|
162
|
+
nova_future = executor.submit(run_nova_act_with_fleet_data, instance.urls.app)
|
|
163
|
+
nova_future.result() # Wait for Nova Act to complete
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f"❌ Error in main flow: {type(e).__name__}: {str(e)}")
|
|
167
|
+
import traceback
|
|
168
|
+
traceback.print_exc()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
try:
|
|
173
|
+
asyncio.run(main())
|
|
174
|
+
except KeyboardInterrupt:
|
|
175
|
+
print("\n\n⚠️ Script interrupted by user")
|
|
176
|
+
print("Nova Act browser may still be running in the background.")
|
|
177
|
+
except Exception as e:
|
|
178
|
+
print(f"\n❌ Unexpected error: {type(e).__name__}: {str(e)}")
|
|
179
|
+
import traceback
|
|
180
|
+
traceback.print_exc()
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import base64
|
|
3
|
+
from typing import List, Dict, Callable
|
|
4
|
+
from playwright.sync_api import sync_playwright, Browser, Page
|
|
5
|
+
|
|
6
|
+
# Optional: key mapping if your model uses "CUA" style keys
|
|
7
|
+
CUA_KEY_TO_PLAYWRIGHT_KEY = {
|
|
8
|
+
"/": "Divide",
|
|
9
|
+
"\\": "Backslash",
|
|
10
|
+
"alt": "Alt",
|
|
11
|
+
"arrowdown": "ArrowDown",
|
|
12
|
+
"arrowleft": "ArrowLeft",
|
|
13
|
+
"arrowright": "ArrowRight",
|
|
14
|
+
"arrowup": "ArrowUp",
|
|
15
|
+
"backspace": "Backspace",
|
|
16
|
+
"capslock": "CapsLock",
|
|
17
|
+
"cmd": "Meta",
|
|
18
|
+
"ctrl": "Control",
|
|
19
|
+
"delete": "Delete",
|
|
20
|
+
"end": "End",
|
|
21
|
+
"enter": "Enter",
|
|
22
|
+
"esc": "Escape",
|
|
23
|
+
"home": "Home",
|
|
24
|
+
"insert": "Insert",
|
|
25
|
+
"option": "Alt",
|
|
26
|
+
"pagedown": "PageDown",
|
|
27
|
+
"pageup": "PageUp",
|
|
28
|
+
"shift": "Shift",
|
|
29
|
+
"space": " ",
|
|
30
|
+
"super": "Meta",
|
|
31
|
+
"tab": "Tab",
|
|
32
|
+
"win": "Meta",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BasePlaywrightComputer:
|
|
37
|
+
"""
|
|
38
|
+
Abstract base for Playwright-based computers:
|
|
39
|
+
|
|
40
|
+
- Subclasses override `_get_browser_and_page()` to do local or remote connection,
|
|
41
|
+
returning (Browser, Page).
|
|
42
|
+
- This base class handles context creation (`__enter__`/`__exit__`),
|
|
43
|
+
plus standard "Computer" actions like click, scroll, etc.
|
|
44
|
+
- We also have extra browser actions: `goto(url)` and `back()`.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def get_environment(self):
|
|
48
|
+
return "browser"
|
|
49
|
+
|
|
50
|
+
def get_dimensions(self):
|
|
51
|
+
return (1024, 768)
|
|
52
|
+
|
|
53
|
+
def __init__(self):
|
|
54
|
+
self._playwright = None
|
|
55
|
+
self._browser: Browser | None = None
|
|
56
|
+
self._page: Page | None = None
|
|
57
|
+
|
|
58
|
+
def __enter__(self):
|
|
59
|
+
# Start Playwright and call the subclass hook for getting browser/page
|
|
60
|
+
self._playwright = sync_playwright().start()
|
|
61
|
+
self._browser, self._page = self._get_browser_and_page()
|
|
62
|
+
|
|
63
|
+
# Set up network interception to flag URLs matching domains in BLOCKED_DOMAINS
|
|
64
|
+
def handle_route(route, request):
|
|
65
|
+
route.continue_()
|
|
66
|
+
|
|
67
|
+
self._page.route("**/*", handle_route)
|
|
68
|
+
|
|
69
|
+
return self
|
|
70
|
+
|
|
71
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
72
|
+
if self._browser:
|
|
73
|
+
self._browser.close()
|
|
74
|
+
if self._playwright:
|
|
75
|
+
self._playwright.stop()
|
|
76
|
+
|
|
77
|
+
def get_current_url(self) -> str:
|
|
78
|
+
return self._page.url
|
|
79
|
+
|
|
80
|
+
# --- Common "Computer" actions ---
|
|
81
|
+
def screenshot(self) -> str:
|
|
82
|
+
"""Capture only the viewport (not full_page)."""
|
|
83
|
+
png_bytes = self._page.screenshot(full_page=False)
|
|
84
|
+
return base64.b64encode(png_bytes).decode("utf-8")
|
|
85
|
+
|
|
86
|
+
def click(self, x: int, y: int, button: str = "left") -> None:
|
|
87
|
+
if button == "back":
|
|
88
|
+
self.back()
|
|
89
|
+
elif button == "forward":
|
|
90
|
+
self.forward()
|
|
91
|
+
elif button == "wheel":
|
|
92
|
+
self._page.mouse.wheel(x, y)
|
|
93
|
+
else:
|
|
94
|
+
button_mapping = {"left": "left", "right": "right"}
|
|
95
|
+
button_type = button_mapping.get(button, "left")
|
|
96
|
+
self._page.mouse.click(x, y, button=button_type)
|
|
97
|
+
|
|
98
|
+
def double_click(self, x: int, y: int) -> None:
|
|
99
|
+
self._page.mouse.dblclick(x, y)
|
|
100
|
+
|
|
101
|
+
def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
102
|
+
self._page.mouse.move(x, y)
|
|
103
|
+
self._page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})")
|
|
104
|
+
|
|
105
|
+
def type(self, text: str) -> None:
|
|
106
|
+
self._page.keyboard.type(text)
|
|
107
|
+
|
|
108
|
+
def wait(self, ms: int = 1000) -> None:
|
|
109
|
+
time.sleep(ms / 1000)
|
|
110
|
+
|
|
111
|
+
def move(self, x: int, y: int) -> None:
|
|
112
|
+
self._page.mouse.move(x, y)
|
|
113
|
+
|
|
114
|
+
def keypress(self, keys: List[str]) -> None:
|
|
115
|
+
mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys]
|
|
116
|
+
for key in mapped_keys:
|
|
117
|
+
self._page.keyboard.down(key)
|
|
118
|
+
for key in reversed(mapped_keys):
|
|
119
|
+
self._page.keyboard.up(key)
|
|
120
|
+
|
|
121
|
+
def drag(self, path: List[Dict[str, int]]) -> None:
|
|
122
|
+
if not path:
|
|
123
|
+
return
|
|
124
|
+
self._page.mouse.move(path[0]["x"], path[0]["y"])
|
|
125
|
+
self._page.mouse.down()
|
|
126
|
+
for point in path[1:]:
|
|
127
|
+
self._page.mouse.move(point["x"], point["y"])
|
|
128
|
+
self._page.mouse.up()
|
|
129
|
+
|
|
130
|
+
# --- Extra browser-oriented actions ---
|
|
131
|
+
def goto(self, url: str) -> None:
|
|
132
|
+
try:
|
|
133
|
+
return self._page.goto(url)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
print(f"Error navigating to {url}: {e}")
|
|
136
|
+
|
|
137
|
+
def back(self) -> None:
|
|
138
|
+
return self._page.go_back()
|
|
139
|
+
|
|
140
|
+
def forward(self) -> None:
|
|
141
|
+
return self._page.go_forward()
|
|
142
|
+
|
|
143
|
+
# --- Subclass hook ---
|
|
144
|
+
def _get_browser_and_page(self) -> tuple[Browser, Page]:
|
|
145
|
+
"""Subclasses must implement, returning (Browser, Page)."""
|
|
146
|
+
raise NotImplementedError
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class LocalPlaywrightBrowser(BasePlaywrightComputer):
|
|
150
|
+
"""Launches a local Chromium instance using Playwright."""
|
|
151
|
+
|
|
152
|
+
def __init__(self, headless: bool = False):
|
|
153
|
+
super().__init__()
|
|
154
|
+
self.headless = headless
|
|
155
|
+
|
|
156
|
+
def _get_browser_and_page(self) -> tuple[Browser, Page]:
|
|
157
|
+
width, height = self.get_dimensions()
|
|
158
|
+
launch_args = [
|
|
159
|
+
f"--window-size={width},{height}",
|
|
160
|
+
"--disable-extensions",
|
|
161
|
+
"--disable-file-system",
|
|
162
|
+
]
|
|
163
|
+
browser = self._playwright.chromium.launch(
|
|
164
|
+
chromium_sandbox=True,
|
|
165
|
+
headless=self.headless,
|
|
166
|
+
args=launch_args,
|
|
167
|
+
env={"DISPLAY": ":0"},
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
context = browser.new_context()
|
|
171
|
+
|
|
172
|
+
# Add event listeners for page creation and closure
|
|
173
|
+
context.on("page", self._handle_new_page)
|
|
174
|
+
|
|
175
|
+
page = context.new_page()
|
|
176
|
+
page.set_viewport_size({"width": width, "height": height})
|
|
177
|
+
page.on("close", self._handle_page_close)
|
|
178
|
+
|
|
179
|
+
page.goto("https://bing.com")
|
|
180
|
+
|
|
181
|
+
return browser, page
|
|
182
|
+
|
|
183
|
+
def _handle_new_page(self, page: Page):
|
|
184
|
+
"""Handle the creation of a new page."""
|
|
185
|
+
print("New page created")
|
|
186
|
+
self._page = page
|
|
187
|
+
page.on("close", self._handle_page_close)
|
|
188
|
+
|
|
189
|
+
def _handle_page_close(self, page: Page):
|
|
190
|
+
"""Handle the closure of a page."""
|
|
191
|
+
print("Page closed")
|
|
192
|
+
if self._page == page:
|
|
193
|
+
if self._browser.contexts[0].pages:
|
|
194
|
+
self._page = self._browser.contexts[0].pages[-1]
|
|
195
|
+
else:
|
|
196
|
+
print("Warning: All pages have been closed.")
|
|
197
|
+
self._page = None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class Agent:
|
|
201
|
+
"""
|
|
202
|
+
A sample agent class that can be used to interact with a computer.
|
|
203
|
+
|
|
204
|
+
(See simple_cua_loop.py for a simple example without an agent.)
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
def __init__(
|
|
208
|
+
self,
|
|
209
|
+
model="computer-use-preview",
|
|
210
|
+
computer: Computer = None,
|
|
211
|
+
tools: list[dict] = [],
|
|
212
|
+
acknowledge_safety_check_callback: Callable = lambda: False,
|
|
213
|
+
):
|
|
214
|
+
self.model = model
|
|
215
|
+
self.computer = computer
|
|
216
|
+
self.tools = tools
|
|
217
|
+
self.print_steps = True
|
|
218
|
+
self.debug = False
|
|
219
|
+
self.show_images = False
|
|
220
|
+
self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
|
|
221
|
+
|
|
222
|
+
if computer:
|
|
223
|
+
dimensions = computer.get_dimensions()
|
|
224
|
+
self.tools += [
|
|
225
|
+
{
|
|
226
|
+
"type": "computer-preview",
|
|
227
|
+
"display_width": dimensions[0],
|
|
228
|
+
"display_height": dimensions[1],
|
|
229
|
+
"environment": computer.get_environment(),
|
|
230
|
+
},
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
def debug_print(self, *args):
|
|
234
|
+
if self.debug:
|
|
235
|
+
pp(*args)
|
|
236
|
+
|
|
237
|
+
def handle_item(self, item):
|
|
238
|
+
"""Handle each item; may cause a computer action + screenshot."""
|
|
239
|
+
if item["type"] == "message":
|
|
240
|
+
if self.print_steps:
|
|
241
|
+
print(item["content"][0]["text"])
|
|
242
|
+
|
|
243
|
+
if item["type"] == "function_call":
|
|
244
|
+
name, args = item["name"], json.loads(item["arguments"])
|
|
245
|
+
if self.print_steps:
|
|
246
|
+
print(f"{name}({args})")
|
|
247
|
+
|
|
248
|
+
if hasattr(self.computer, name): # if function exists on computer, call it
|
|
249
|
+
method = getattr(self.computer, name)
|
|
250
|
+
method(**args)
|
|
251
|
+
return [
|
|
252
|
+
{
|
|
253
|
+
"type": "function_call_output",
|
|
254
|
+
"call_id": item["call_id"],
|
|
255
|
+
"output": "success", # hard-coded output for demo
|
|
256
|
+
}
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
if item["type"] == "computer_call":
|
|
260
|
+
action = item["action"]
|
|
261
|
+
action_type = action["type"]
|
|
262
|
+
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
263
|
+
if self.print_steps:
|
|
264
|
+
print(f"{action_type}({action_args})")
|
|
265
|
+
|
|
266
|
+
method = getattr(self.computer, action_type)
|
|
267
|
+
method(**action_args)
|
|
268
|
+
|
|
269
|
+
screenshot_base64 = self.computer.screenshot()
|
|
270
|
+
if self.show_images:
|
|
271
|
+
show_image(screenshot_base64)
|
|
272
|
+
|
|
273
|
+
# if user doesn't ack all safety checks exit with error
|
|
274
|
+
pending_checks = item.get("pending_safety_checks", [])
|
|
275
|
+
for check in pending_checks:
|
|
276
|
+
message = check["message"]
|
|
277
|
+
if not self.acknowledge_safety_check_callback(message):
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"Safety check failed: {message}. Cannot continue with unacknowledged safety checks."
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
call_output = {
|
|
283
|
+
"type": "computer_call_output",
|
|
284
|
+
"call_id": item["call_id"],
|
|
285
|
+
"acknowledged_safety_checks": pending_checks,
|
|
286
|
+
"output": {
|
|
287
|
+
"type": "input_image",
|
|
288
|
+
"image_url": f"data:image/png;base64,{screenshot_base64}",
|
|
289
|
+
},
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# additional URL safety checks for browser environments
|
|
293
|
+
if self.computer.get_environment() == "browser":
|
|
294
|
+
current_url = self.computer.get_current_url()
|
|
295
|
+
check_blocklisted_url(current_url)
|
|
296
|
+
call_output["output"]["current_url"] = current_url
|
|
297
|
+
|
|
298
|
+
return [call_output]
|
|
299
|
+
return []
|
|
300
|
+
|
|
301
|
+
def run_full_turn(
|
|
302
|
+
self, input_items, print_steps=True, debug=False, show_images=False
|
|
303
|
+
):
|
|
304
|
+
self.print_steps = print_steps
|
|
305
|
+
self.debug = debug
|
|
306
|
+
self.show_images = show_images
|
|
307
|
+
new_items = []
|
|
308
|
+
|
|
309
|
+
# keep looping until we get a final response
|
|
310
|
+
while new_items[-1].get("role") != "assistant" if new_items else True:
|
|
311
|
+
self.debug_print([sanitize_message(msg) for msg in input_items + new_items])
|
|
312
|
+
|
|
313
|
+
response = create_response(
|
|
314
|
+
model=self.model,
|
|
315
|
+
input=input_items + new_items,
|
|
316
|
+
tools=self.tools,
|
|
317
|
+
truncation="auto",
|
|
318
|
+
)
|
|
319
|
+
self.debug_print(response)
|
|
320
|
+
|
|
321
|
+
if "output" not in response and self.debug:
|
|
322
|
+
print(response)
|
|
323
|
+
raise ValueError("No output from model")
|
|
324
|
+
else:
|
|
325
|
+
new_items += response["output"]
|
|
326
|
+
for item in response["output"]:
|
|
327
|
+
new_items += self.handle_item(item)
|
|
328
|
+
|
|
329
|
+
return new_items
|
|
@@ -14,17 +14,36 @@
|
|
|
14
14
|
|
|
15
15
|
"""Fleet Python SDK - Environment-based AI agent interactions."""
|
|
16
16
|
|
|
17
|
-
from .exceptions import
|
|
17
|
+
from .exceptions import (
|
|
18
|
+
FleetError,
|
|
19
|
+
FleetAPIError,
|
|
20
|
+
FleetTimeoutError,
|
|
21
|
+
FleetConfigurationError,
|
|
22
|
+
)
|
|
18
23
|
from .client import Fleet, AsyncFleet, InstanceRequest
|
|
24
|
+
from .env import (
|
|
25
|
+
ResetRequest,
|
|
26
|
+
ResetResponse,
|
|
27
|
+
CDPDescribeResponse,
|
|
28
|
+
ChromeStartRequest,
|
|
29
|
+
ChromeStartResponse,
|
|
30
|
+
ChromeStatusResponse,
|
|
31
|
+
)
|
|
19
32
|
|
|
20
33
|
__version__ = "0.1.1"
|
|
21
34
|
__all__ = [
|
|
22
35
|
"env",
|
|
23
36
|
"FleetError",
|
|
24
|
-
"FleetAPIError",
|
|
37
|
+
"FleetAPIError",
|
|
25
38
|
"FleetTimeoutError",
|
|
26
39
|
"FleetConfigurationError",
|
|
27
40
|
"Fleet",
|
|
28
41
|
"AsyncFleet",
|
|
29
42
|
"InstanceRequest",
|
|
30
|
-
|
|
43
|
+
"ResetRequest",
|
|
44
|
+
"ResetResponse",
|
|
45
|
+
"CDPDescribeResponse",
|
|
46
|
+
"ChromeStartRequest",
|
|
47
|
+
"ChromeStartResponse",
|
|
48
|
+
"ChromeStatusResponse",
|
|
49
|
+
]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Fleet SDK Environment Module."""
|
|
2
|
+
|
|
3
|
+
from .client import Environment, AsyncEnvironment
|
|
4
|
+
from .models import (
|
|
5
|
+
ResetRequest,
|
|
6
|
+
ResetResponse,
|
|
7
|
+
CDPDescribeResponse,
|
|
8
|
+
ChromeStartRequest,
|
|
9
|
+
ChromeStartResponse,
|
|
10
|
+
ChromeStatusResponse,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Environment",
|
|
15
|
+
"AsyncEnvironment",
|
|
16
|
+
"ResetRequest",
|
|
17
|
+
"ResetResponse",
|
|
18
|
+
"CDPDescribeResponse",
|
|
19
|
+
"ChromeStartRequest",
|
|
20
|
+
"ChromeStartResponse",
|
|
21
|
+
"ChromeStatusResponse",
|
|
22
|
+
]
|
|
@@ -15,6 +15,7 @@ from ..exceptions import FleetEnvironmentError, FleetAPIError
|
|
|
15
15
|
|
|
16
16
|
from .base import SyncWrapper, AsyncWrapper
|
|
17
17
|
from .models import (
|
|
18
|
+
ResetRequest,
|
|
18
19
|
ResetResponse,
|
|
19
20
|
Resource as ResourceModel,
|
|
20
21
|
ResourceType,
|
|
@@ -26,7 +27,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
RESOURCE_TYPES = {
|
|
29
|
-
ResourceType.
|
|
30
|
+
ResourceType.db: AsyncSQLiteResource,
|
|
30
31
|
ResourceType.cdp: AsyncBrowserResource,
|
|
31
32
|
}
|
|
32
33
|
|
|
@@ -66,17 +67,30 @@ class AsyncEnvironment:
|
|
|
66
67
|
async def load(self) -> None:
|
|
67
68
|
await self._load_resources()
|
|
68
69
|
|
|
69
|
-
async def reset(
|
|
70
|
-
|
|
70
|
+
async def reset(
|
|
71
|
+
self, reset_request: Optional[ResetRequest] = None
|
|
72
|
+
) -> ResetResponse:
|
|
73
|
+
response = await self.client.request(
|
|
74
|
+
"POST", "/reset", json=reset_request.model_dump() if reset_request else None
|
|
75
|
+
)
|
|
71
76
|
return ResetResponse(**response.json())
|
|
72
77
|
|
|
73
78
|
def state(self, uri: str) -> Resource:
|
|
74
79
|
url = urlparse(uri)
|
|
75
80
|
return self._resources_state[url.scheme][url.netloc]
|
|
76
81
|
|
|
77
|
-
def
|
|
82
|
+
def db(self, name: str) -> AsyncSQLiteResource:
|
|
83
|
+
"""
|
|
84
|
+
Returns an AsyncSQLiteResource object for the given SQLite database name.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
name: The name of the SQLite database to return
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
An AsyncSQLiteResource object for the given SQLite database name
|
|
91
|
+
"""
|
|
78
92
|
return AsyncSQLiteResource(
|
|
79
|
-
self._resources_state[ResourceType.
|
|
93
|
+
self._resources_state[ResourceType.db.value][name], self.client
|
|
80
94
|
)
|
|
81
95
|
|
|
82
96
|
def browser(self, name: str) -> AsyncBrowserResource:
|
|
@@ -84,9 +98,13 @@ class AsyncEnvironment:
|
|
|
84
98
|
self._resources_state[ResourceType.cdp.value][name], self.client
|
|
85
99
|
)
|
|
86
100
|
|
|
87
|
-
async def resources(self) -> List[
|
|
101
|
+
async def resources(self) -> List[Resource]:
|
|
88
102
|
await self._load_resources()
|
|
89
|
-
return
|
|
103
|
+
return [
|
|
104
|
+
resource
|
|
105
|
+
for resources_by_name in self._resources_state.values()
|
|
106
|
+
for resource in resources_by_name.values()
|
|
107
|
+
]
|
|
90
108
|
|
|
91
109
|
async def _load_resources(self) -> None:
|
|
92
110
|
if self._resources is None:
|
|
@@ -94,9 +112,9 @@ class AsyncEnvironment:
|
|
|
94
112
|
if response.status_code != 200:
|
|
95
113
|
self._resources = []
|
|
96
114
|
return
|
|
97
|
-
|
|
115
|
+
print(response.json())
|
|
98
116
|
self._resources = [
|
|
99
|
-
ResourceModel(**resource) for resource in
|
|
117
|
+
ResourceModel(**resource) for resource in response.json()
|
|
100
118
|
]
|
|
101
119
|
for resource in self._resources:
|
|
102
120
|
if resource.type not in self._resources_state:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
|
-
# filename:
|
|
3
|
-
# timestamp: 2025-07-
|
|
2
|
+
# filename: openapi (2).json
|
|
3
|
+
# timestamp: 2025-07-09T20:11:31+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
@@ -10,22 +10,27 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class
|
|
13
|
+
class CDPDescribeResponse(BaseModel):
|
|
14
14
|
success: bool = Field(..., title="Success")
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
cdp_page_url: str = Field(..., title="Url")
|
|
16
|
+
cdp_browser_url: str = Field(..., title="Browser Url")
|
|
17
|
+
cdp_devtools_url: str = Field(..., title="Devtools Url")
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
class
|
|
20
|
-
start_page: Optional[str] = Field("about:blank", title="Start Page")
|
|
20
|
+
class ChromeStartRequest(BaseModel):
|
|
21
21
|
resolution: Optional[str] = Field("1920x1080", title="Resolution")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class
|
|
24
|
+
class ChromeStartResponse(BaseModel):
|
|
25
25
|
success: bool = Field(..., title="Success")
|
|
26
26
|
message: str = Field(..., title="Message")
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
class ChromeStatusResponse(BaseModel):
|
|
30
|
+
running: bool = Field(..., title="Running")
|
|
31
|
+
message: str = Field(..., title="Message")
|
|
32
|
+
|
|
33
|
+
|
|
29
34
|
class CreateSnapshotsResponse(BaseModel):
|
|
30
35
|
success: bool = Field(..., title="Success")
|
|
31
36
|
initial_snapshot_path: Optional[str] = Field(None, title="Initial Snapshot Path")
|
|
@@ -69,8 +74,8 @@ class QueryResponse(BaseModel):
|
|
|
69
74
|
|
|
70
75
|
|
|
71
76
|
class ResetRequest(BaseModel):
|
|
77
|
+
timestamp: Optional[int] = Field(None, title="Timestamp")
|
|
72
78
|
seed: Optional[int] = Field(None, title="Seed")
|
|
73
|
-
timestamp: Optional[str] = Field(None, title="Timestamp")
|
|
74
79
|
|
|
75
80
|
|
|
76
81
|
class ResetResponse(BaseModel):
|
|
@@ -84,7 +89,7 @@ class ResourceMode(Enum):
|
|
|
84
89
|
|
|
85
90
|
|
|
86
91
|
class ResourceType(Enum):
|
|
87
|
-
|
|
92
|
+
db = "sqlite"
|
|
88
93
|
cdp = "cdp"
|
|
89
94
|
|
|
90
95
|
|
|
@@ -121,7 +126,3 @@ class Resource(BaseModel):
|
|
|
121
126
|
type: ResourceType
|
|
122
127
|
mode: ResourceMode
|
|
123
128
|
label: Optional[str] = Field(None, title="Label")
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class ResourcesResponse(BaseModel):
|
|
127
|
-
resources: List[Resource] = Field(..., title="Resources")
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from ..env.models import (
|
|
4
|
+
Resource as ResourceModel,
|
|
5
|
+
CDPDescribeResponse,
|
|
6
|
+
ChromeStartRequest,
|
|
7
|
+
ChromeStartResponse,
|
|
8
|
+
)
|
|
9
|
+
from .base import Resource
|
|
10
|
+
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from ..env.base import AsyncWrapper
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AsyncBrowserResource(Resource):
|
|
18
|
+
def __init__(self, resource: ResourceModel, client: "AsyncWrapper"):
|
|
19
|
+
super().__init__(resource)
|
|
20
|
+
self.client = client
|
|
21
|
+
|
|
22
|
+
async def start(
|
|
23
|
+
self, start_request: Optional[ChromeStartRequest] = None
|
|
24
|
+
) -> ChromeStartResponse:
|
|
25
|
+
response = await self.client.request(
|
|
26
|
+
"POST",
|
|
27
|
+
"/resources/cdp/start",
|
|
28
|
+
json=start_request.model_dump() if start_request else None,
|
|
29
|
+
)
|
|
30
|
+
return ChromeStartResponse(**response.json())
|
|
31
|
+
|
|
32
|
+
async def describe(self) -> CDPDescribeResponse:
|
|
33
|
+
response = await self.client.request("GET", "/resources/cdp/describe")
|
|
34
|
+
return CDPDescribeResponse(**response.json())
|
|
@@ -17,7 +17,7 @@ class AsyncSQLiteResource(Resource):
|
|
|
17
17
|
async def describe(self) -> DescribeResponse:
|
|
18
18
|
"""Describe the SQLite database schema."""
|
|
19
19
|
response = await self.client.request(
|
|
20
|
-
"GET", f"/
|
|
20
|
+
"GET", f"/resources/sqlite/{self.resource.name}/describe"
|
|
21
21
|
)
|
|
22
22
|
return DescribeResponse(**response.json())
|
|
23
23
|
|
|
@@ -35,7 +35,7 @@ class AsyncSQLiteResource(Resource):
|
|
|
35
35
|
request = QueryRequest(query=query, args=args, read_only=read_only)
|
|
36
36
|
response = await self.client.request(
|
|
37
37
|
"POST",
|
|
38
|
-
f"/
|
|
38
|
+
f"/resources/sqlite/{self.resource.name}/query",
|
|
39
39
|
json=request.model_dump(),
|
|
40
40
|
)
|
|
41
41
|
return QueryResponse(**response.json())
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from ..env.models import Resource as ResourceModel
|
|
2
|
-
from ..env.models import BrowserDescribeResponse
|
|
3
|
-
from .base import Resource
|
|
4
|
-
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
|
-
|
|
7
|
-
if TYPE_CHECKING:
|
|
8
|
-
from ..env.base import AsyncWrapper
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class AsyncBrowserResource(Resource):
|
|
12
|
-
def __init__(self, resource: ResourceModel, client: "AsyncWrapper"):
|
|
13
|
-
super().__init__(resource)
|
|
14
|
-
self.client = client
|
|
15
|
-
|
|
16
|
-
async def describe(self) -> BrowserDescribeResponse:
|
|
17
|
-
response = await self.client.request("GET", "/resource/cdp/describe")
|
|
18
|
-
return BrowserDescribeResponse(**response.json())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|