meshagent-computers 0.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-computers might be problematic. Click here for more details.
- meshagent/computers/__init__.py +21 -0
- meshagent/computers/agent.py +229 -0
- meshagent/computers/base_playwright.py +173 -0
- meshagent/computers/browserbase.py +196 -0
- meshagent/computers/computer.py +36 -0
- meshagent/computers/docker.py +179 -0
- meshagent/computers/local_playwright.py +25 -0
- meshagent/computers/operator.py +79 -0
- meshagent/computers/scrapybara.py +212 -0
- meshagent/computers/utils.py +78 -0
- meshagent/computers/version.py +1 -0
- meshagent_computers-0.6.7.dist-info/METADATA +69 -0
- meshagent_computers-0.6.7.dist-info/RECORD +16 -0
- meshagent_computers-0.6.7.dist-info/WHEEL +5 -0
- meshagent_computers-0.6.7.dist-info/licenses/LICENSE +201 -0
- meshagent_computers-0.6.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import time
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def _async_check_output(*args, **kwargs):
|
|
7
|
+
proc = await asyncio.create_subprocess_exec(
|
|
8
|
+
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, **kwargs
|
|
9
|
+
)
|
|
10
|
+
stdout, stderr = await proc.communicate()
|
|
11
|
+
if proc.returncode != 0:
|
|
12
|
+
raise subprocess.CalledProcessError(
|
|
13
|
+
proc.returncode, args, output=stdout, stderr=stderr
|
|
14
|
+
)
|
|
15
|
+
return stdout
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DockerComputer:
|
|
19
|
+
environment = "linux"
|
|
20
|
+
dimensions = (1280, 720) # Default fallback; will be updated in __enter__.
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
container_name="cua-sample-app",
|
|
25
|
+
image="ghcr.io/openai/openai-cua-sample-app:latest",
|
|
26
|
+
display=":99",
|
|
27
|
+
port_mapping="5900:5900",
|
|
28
|
+
):
|
|
29
|
+
self.container_name = container_name
|
|
30
|
+
self.image = image
|
|
31
|
+
self.display = display
|
|
32
|
+
self.port_mapping = port_mapping
|
|
33
|
+
|
|
34
|
+
async def __aenter__(self):
|
|
35
|
+
# Check if the container is running
|
|
36
|
+
result = subprocess.run(
|
|
37
|
+
["docker", "ps", "-q", "-f", f"name={self.container_name}"],
|
|
38
|
+
capture_output=True,
|
|
39
|
+
text=True,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if not result.stdout.strip():
|
|
43
|
+
raise RuntimeError(
|
|
44
|
+
f"Container {self.container_name} is not running. Build and run with:\n"
|
|
45
|
+
f"docker build -t {self.container_name} .\n"
|
|
46
|
+
f"docker run --rm -it --name {self.container_name} "
|
|
47
|
+
f"-p {self.port_mapping} -e DISPLAY={self.display} {self.container_name}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Fetch display geometry
|
|
51
|
+
geometry = await self._exec(
|
|
52
|
+
f"DISPLAY={self.display} xdotool getdisplaygeometry"
|
|
53
|
+
).strip()
|
|
54
|
+
if geometry:
|
|
55
|
+
w, h = geometry.split()
|
|
56
|
+
self.dimensions = (int(w), int(h))
|
|
57
|
+
# print("Starting Docker container...")
|
|
58
|
+
# # Run the container detached, removing it automatically when it stops
|
|
59
|
+
# subprocess.check_call(
|
|
60
|
+
# [
|
|
61
|
+
# "docker",
|
|
62
|
+
# "run",
|
|
63
|
+
# "-d",
|
|
64
|
+
# "--rm",
|
|
65
|
+
# "--name",
|
|
66
|
+
# self.container_name,
|
|
67
|
+
# "-p",
|
|
68
|
+
# self.port_mapping,
|
|
69
|
+
# self.image,
|
|
70
|
+
# ]
|
|
71
|
+
# )
|
|
72
|
+
# # Give the container a moment to start
|
|
73
|
+
# time.sleep(3)
|
|
74
|
+
# print("Entering DockerComputer context")
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
78
|
+
# print("Stopping Docker container...")
|
|
79
|
+
# subprocess.check_call(["docker", "stop", self.container_name])
|
|
80
|
+
# print("Exiting DockerComputer context")
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
async def _exec(self, cmd: str) -> str:
|
|
84
|
+
"""
|
|
85
|
+
Run 'cmd' in the container.
|
|
86
|
+
We wrap cmd in double quotes and escape any double quotes inside it,
|
|
87
|
+
so spaces or quotes don't break the shell call.
|
|
88
|
+
"""
|
|
89
|
+
# Escape any existing double quotes in cmd
|
|
90
|
+
safe_cmd = cmd.replace('"', '\\"')
|
|
91
|
+
|
|
92
|
+
# Then wrap the entire cmd in double quotes for `sh -c`
|
|
93
|
+
docker_cmd = f'docker exec {self.container_name} sh -c "{safe_cmd}"'
|
|
94
|
+
|
|
95
|
+
return (await _async_check_output(docker_cmd, shell=True)).decode(
|
|
96
|
+
"utf-8", errors="ignore"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
async def screenshot(self) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Takes a screenshot with ImageMagick (import), returning base64-encoded PNG.
|
|
102
|
+
Requires 'import'.
|
|
103
|
+
"""
|
|
104
|
+
# cmd = (
|
|
105
|
+
# f"export DISPLAY={self.display} && "
|
|
106
|
+
# "import -window root /tmp/screenshot.png && "
|
|
107
|
+
# "base64 /tmp/screenshot.png"
|
|
108
|
+
# )
|
|
109
|
+
cmd = (
|
|
110
|
+
f"export DISPLAY={self.display} && import -window root png:- | base64 -w 0"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return await self._exec(cmd)
|
|
114
|
+
|
|
115
|
+
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
116
|
+
button_map = {"left": 1, "middle": 2, "right": 3}
|
|
117
|
+
b = button_map.get(button, 1)
|
|
118
|
+
await self._exec(f"DISPLAY={self.display} xdotool mousemove {x} {y} click {b}")
|
|
119
|
+
|
|
120
|
+
async def double_click(self, x: int, y: int) -> None:
|
|
121
|
+
await self._exec(
|
|
122
|
+
f"DISPLAY={self.display} xdotool mousemove {x} {y} click --repeat 2 1"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
126
|
+
"""
|
|
127
|
+
For simple vertical scrolling: xdotool click 4 (scroll up) or 5 (scroll down).
|
|
128
|
+
"""
|
|
129
|
+
await self._exec(f"DISPLAY={self.display} xdotool mousemove {x} {y}")
|
|
130
|
+
clicks = abs(scroll_y)
|
|
131
|
+
button = 4 if scroll_y < 0 else 5
|
|
132
|
+
for _ in range(clicks):
|
|
133
|
+
await self._exec(f"DISPLAY={self.display} xdotool click {button}")
|
|
134
|
+
|
|
135
|
+
async def type(self, text: str) -> None:
|
|
136
|
+
"""
|
|
137
|
+
Type the given text via xdotool, preserving spaces and quotes.
|
|
138
|
+
"""
|
|
139
|
+
# Escape single quotes in the user text: ' -> '\'\''
|
|
140
|
+
safe_text = text.replace("'", "'\\''")
|
|
141
|
+
# Then wrap everything in single quotes for xdotool
|
|
142
|
+
cmd = f"DISPLAY={self.display} xdotool type -- '{safe_text}'"
|
|
143
|
+
await self._exec(cmd)
|
|
144
|
+
|
|
145
|
+
async def wait(self, ms: int = 1000) -> None:
|
|
146
|
+
time.sleep(ms / 1000)
|
|
147
|
+
|
|
148
|
+
async def move(self, x: int, y: int) -> None:
|
|
149
|
+
await self._exec(f"DISPLAY={self.display} xdotool mousemove {x} {y}")
|
|
150
|
+
|
|
151
|
+
async def keypress(self, keys: list[str]) -> None:
|
|
152
|
+
mapping = {
|
|
153
|
+
"ENTER": "Return",
|
|
154
|
+
"LEFT": "Left",
|
|
155
|
+
"RIGHT": "Right",
|
|
156
|
+
"UP": "Up",
|
|
157
|
+
"DOWN": "Down",
|
|
158
|
+
"ESC": "Escape",
|
|
159
|
+
"SPACE": "space",
|
|
160
|
+
"BACKSPACE": "BackSpace",
|
|
161
|
+
"TAB": "Tab",
|
|
162
|
+
}
|
|
163
|
+
mapped_keys = [mapping.get(key, key) for key in keys]
|
|
164
|
+
combo = "+".join(mapped_keys)
|
|
165
|
+
await self._exec(f"DISPLAY={self.display} xdotool key {combo}")
|
|
166
|
+
|
|
167
|
+
async def drag(self, path: list[dict[str, int]]) -> None:
|
|
168
|
+
if not path:
|
|
169
|
+
return
|
|
170
|
+
start_x = path[0]["x"]
|
|
171
|
+
start_y = path[0]["y"]
|
|
172
|
+
self._exec(
|
|
173
|
+
f"DISPLAY={self.display} xdotool mousemove {start_x} {start_y} mousedown 1"
|
|
174
|
+
)
|
|
175
|
+
for point in path[1:]:
|
|
176
|
+
await self._exec(
|
|
177
|
+
f"DISPLAY={self.display} xdotool mousemove {point['x']} {point['y']}"
|
|
178
|
+
)
|
|
179
|
+
await self._exec(f"DISPLAY={self.display} xdotool mouseup 1")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from playwright.async_api import Browser, Page
|
|
2
|
+
from .base_playwright import BasePlaywrightComputer
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LocalPlaywrightComputer(BasePlaywrightComputer):
|
|
6
|
+
"""Launches a local Chromium instance using Playwright."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, headless: bool = False):
|
|
9
|
+
super().__init__()
|
|
10
|
+
self.headless = headless
|
|
11
|
+
|
|
12
|
+
async def _get_browser_and_page(self) -> tuple[Browser, Page]:
|
|
13
|
+
width, height = self.dimensions
|
|
14
|
+
launch_args = [
|
|
15
|
+
f"--window-size={width},{height}",
|
|
16
|
+
"--disable-extensions",
|
|
17
|
+
"--disable-file-system",
|
|
18
|
+
]
|
|
19
|
+
browser = await self._playwright.chromium.launch(
|
|
20
|
+
chromium_sandbox=True, headless=self.headless, args=launch_args, env={}
|
|
21
|
+
)
|
|
22
|
+
page = await browser.new_page()
|
|
23
|
+
await page.set_viewport_size({"width": width, "height": height})
|
|
24
|
+
await page.goto("https://google.com")
|
|
25
|
+
return browser, page
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from .computer import Computer
|
|
2
|
+
from .utils import check_blocklisted_url
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Operator:
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self.print_steps = False
|
|
9
|
+
self.show_images = False
|
|
10
|
+
|
|
11
|
+
async def acknowledge_safety_check_callback(self, data: dict):
|
|
12
|
+
return True
|
|
13
|
+
|
|
14
|
+
async def show_image(self, base_64: str):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
async def play(self, *, computer: Computer, item: dict) -> list:
|
|
18
|
+
"""Handle each item; may cause a computer action + screenshot."""
|
|
19
|
+
if item["type"] == "message":
|
|
20
|
+
if self.print_steps:
|
|
21
|
+
print(item["content"][0]["text"])
|
|
22
|
+
|
|
23
|
+
if item["type"] == "function_call":
|
|
24
|
+
name, args = item["name"], json.loads(item["arguments"])
|
|
25
|
+
if self.print_steps:
|
|
26
|
+
print(f"{name}({args})")
|
|
27
|
+
|
|
28
|
+
if hasattr(computer, name): # if function exists on computer, call it
|
|
29
|
+
method = getattr(computer, name)
|
|
30
|
+
await method(**args)
|
|
31
|
+
return [
|
|
32
|
+
{
|
|
33
|
+
"type": "function_call_output",
|
|
34
|
+
"call_id": item["call_id"],
|
|
35
|
+
"output": "success", # hard-coded output for demo
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
if item["type"] == "computer_call":
|
|
40
|
+
action = item["action"]
|
|
41
|
+
action_type = action["type"]
|
|
42
|
+
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
43
|
+
if self.print_steps:
|
|
44
|
+
print(f"{action_type}({action_args})")
|
|
45
|
+
|
|
46
|
+
method = getattr(computer, action_type)
|
|
47
|
+
await method(**action_args)
|
|
48
|
+
|
|
49
|
+
screenshot_base64 = await computer.screenshot()
|
|
50
|
+
if self.show_images:
|
|
51
|
+
self.show_image(screenshot_base64)
|
|
52
|
+
|
|
53
|
+
# if user doesn't ack all safety checks exit with error
|
|
54
|
+
pending_checks = item.get("pending_safety_checks", [])
|
|
55
|
+
for check in pending_checks:
|
|
56
|
+
message = check["message"]
|
|
57
|
+
if not await self.acknowledge_safety_check_callback(message):
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"Safety check failed: {message}. Cannot continue with unacknowledged safety checks."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
call_output = {
|
|
63
|
+
"type": "computer_call_output",
|
|
64
|
+
"call_id": item["call_id"],
|
|
65
|
+
"acknowledged_safety_checks": pending_checks,
|
|
66
|
+
"output": {
|
|
67
|
+
"type": "input_image",
|
|
68
|
+
"image_url": f"data:image/png;base64,{screenshot_base64}",
|
|
69
|
+
},
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# additional URL safety checks for browser environments
|
|
73
|
+
if computer.environment == "browser":
|
|
74
|
+
current_url = await computer.get_current_url()
|
|
75
|
+
check_blocklisted_url(current_url)
|
|
76
|
+
call_output["output"]["current_url"] = current_url
|
|
77
|
+
|
|
78
|
+
return [call_output]
|
|
79
|
+
return []
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from scrapybara import AsyncScrapybara
|
|
5
|
+
from playwright.async_api import async_playwright, Browser, Page
|
|
6
|
+
from meshagent.computers.utils import BLOCKED_DOMAINS
|
|
7
|
+
|
|
8
|
+
load_dotenv()
|
|
9
|
+
|
|
10
|
+
CUA_KEY_TO_SCRAPYBARA_KEY = {
|
|
11
|
+
"/": "slash",
|
|
12
|
+
"\\": "backslash",
|
|
13
|
+
"arrowdown": "Down",
|
|
14
|
+
"arrowleft": "Left",
|
|
15
|
+
"arrowright": "Right",
|
|
16
|
+
"arrowup": "Up",
|
|
17
|
+
"backspace": "BackSpace",
|
|
18
|
+
"capslock": "Caps_Lock",
|
|
19
|
+
"cmd": "Meta_L",
|
|
20
|
+
"delete": "Delete",
|
|
21
|
+
"end": "End",
|
|
22
|
+
"enter": "Return",
|
|
23
|
+
"esc": "Escape",
|
|
24
|
+
"home": "Home",
|
|
25
|
+
"insert": "Insert",
|
|
26
|
+
"option": "Alt_L",
|
|
27
|
+
"pagedown": "Page_Down",
|
|
28
|
+
"pageup": "Page_Up",
|
|
29
|
+
"tab": "Tab",
|
|
30
|
+
"win": "Meta_L",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ScrapybaraBrowser:
|
|
35
|
+
"""
|
|
36
|
+
Scrapybara provides virtual desktops and browsers in the cloud. https://scrapybara.com
|
|
37
|
+
You can try OpenAI CUA for free at https://computer.new or read our CUA Quickstart at https://computer.new/cua.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
self.client = AsyncScrapybara(api_key=os.getenv("SCRAPYBARA_API_KEY"))
|
|
42
|
+
self.environment = "browser"
|
|
43
|
+
self.dimensions = (1024, 768)
|
|
44
|
+
self._playwright = None
|
|
45
|
+
self._browser: Browser | None = None
|
|
46
|
+
self._page: Page | None = None
|
|
47
|
+
|
|
48
|
+
async def __aenter__(self):
|
|
49
|
+
print("Starting scrapybara browser")
|
|
50
|
+
blocked_domains = [
|
|
51
|
+
domain.replace("https://", "").replace("www.", "")
|
|
52
|
+
for domain in BLOCKED_DOMAINS
|
|
53
|
+
]
|
|
54
|
+
self.instance = await self.client.start_browser(blocked_domains=blocked_domains)
|
|
55
|
+
print("Scrapybara browser started ₍ᐢ•(ܫ)•ᐢ₎")
|
|
56
|
+
print(
|
|
57
|
+
f"You can view and interact with the stream at {self.instance.get_stream_url().stream_url}"
|
|
58
|
+
)
|
|
59
|
+
self._playwright_context = async_playwright()
|
|
60
|
+
self._playwright = await self._playwright_context.__aenter__()
|
|
61
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(
|
|
62
|
+
(await self.instance.get_cdp_url()).cdp_url
|
|
63
|
+
)
|
|
64
|
+
self._page = self._browser.contexts[0].pages[0]
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
68
|
+
await self._playwright_context.__aexit__(exc_type, exc_val, exc_tb)
|
|
69
|
+
|
|
70
|
+
print("Stopping scrapybara browser")
|
|
71
|
+
await self.instance.stop()
|
|
72
|
+
print("Scrapybara browser stopped ₍ᐢ-(ェ)-ᐢ₎")
|
|
73
|
+
|
|
74
|
+
async def goto(self, url: str) -> None:
|
|
75
|
+
await self._page.goto(url)
|
|
76
|
+
|
|
77
|
+
async def get_current_url(self) -> str:
|
|
78
|
+
return (await self.instance.get_current_url()).current_url
|
|
79
|
+
|
|
80
|
+
async def screenshot(self) -> str:
|
|
81
|
+
return (await self.instance.screenshot()).base_64_image
|
|
82
|
+
|
|
83
|
+
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
84
|
+
button = "middle" if button == "wheel" else button
|
|
85
|
+
await self.instance.computer(
|
|
86
|
+
action="click_mouse",
|
|
87
|
+
click_type="click",
|
|
88
|
+
button=button,
|
|
89
|
+
coordinates=[x, y],
|
|
90
|
+
num_clicks=1,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
async def double_click(self, x: int, y: int) -> None:
|
|
94
|
+
await self.instance.computer(
|
|
95
|
+
action="click_mouse",
|
|
96
|
+
click_type="click",
|
|
97
|
+
button="left",
|
|
98
|
+
coordinates=[x, y],
|
|
99
|
+
num_clicks=2,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
103
|
+
await self.instance.computer(
|
|
104
|
+
action="scroll",
|
|
105
|
+
coordinates=[x, y],
|
|
106
|
+
delta_x=scroll_x // 20,
|
|
107
|
+
delta_y=scroll_y // 20,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
async def type(self, text: str) -> None:
|
|
111
|
+
await self.instance.computer(action="type_text", text=text)
|
|
112
|
+
|
|
113
|
+
async def wait(self, ms: int = 1000) -> None:
|
|
114
|
+
time.sleep(ms / 1000)
|
|
115
|
+
# Scrapybara also has `self.instance.computer(action="wait", duration=ms / 1000)`
|
|
116
|
+
|
|
117
|
+
async def move(self, x: int, y: int) -> None:
|
|
118
|
+
await self.instance.computer(action="move_mouse", coordinates=[x, y])
|
|
119
|
+
|
|
120
|
+
async def keypress(self, keys: list[str]) -> None:
|
|
121
|
+
mapped_keys = [
|
|
122
|
+
CUA_KEY_TO_SCRAPYBARA_KEY.get(key.lower(), key.lower()) for key in keys
|
|
123
|
+
]
|
|
124
|
+
await self.instance.computer(action="press_key", keys=mapped_keys)
|
|
125
|
+
|
|
126
|
+
async def drag(self, path: list[dict[str, int]]) -> None:
|
|
127
|
+
if not path:
|
|
128
|
+
return
|
|
129
|
+
path = [[point["x"], point["y"]] for point in path]
|
|
130
|
+
await self.instance.computer(action="drag_mouse", path=path)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class ScrapybaraUbuntu:
|
|
134
|
+
"""
|
|
135
|
+
Scrapybara provides virtual desktops and browsers in the cloud.
|
|
136
|
+
You can try OpenAI CUA for free at https://computer.new or read our CUA Quickstart at https://computer.new/cua.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
def __init__(self):
|
|
140
|
+
self.client = AsyncScrapybara(api_key=os.getenv("SCRAPYBARA_API_KEY"))
|
|
141
|
+
self.environment = "linux" # "windows", "mac", "linux", or "browser"
|
|
142
|
+
self.dimensions = (1024, 768)
|
|
143
|
+
|
|
144
|
+
async def __aenter__(self):
|
|
145
|
+
print("Starting Scrapybara Ubuntu instance")
|
|
146
|
+
blocked_domains = [
|
|
147
|
+
domain.replace("https://", "").replace("www.", "")
|
|
148
|
+
for domain in BLOCKED_DOMAINS
|
|
149
|
+
]
|
|
150
|
+
self.instance = await self.client.start_ubuntu(blocked_domains=blocked_domains)
|
|
151
|
+
print("Scrapybara Ubuntu instance started ₍ᐢ•(ܫ)•ᐢ₎")
|
|
152
|
+
print(
|
|
153
|
+
f"You can view and interact with the stream at {self.instance.get_stream_url().stream_url}"
|
|
154
|
+
)
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
158
|
+
print("Stopping Scrapybara Ubuntu instance")
|
|
159
|
+
(await self.instance).stop()
|
|
160
|
+
print("Scrapybara Ubuntu instance stopped ₍ᐢ-(ェ)-ᐢ₎")
|
|
161
|
+
|
|
162
|
+
async def screenshot(self) -> str:
|
|
163
|
+
return (await self.instance.screenshot()).base_64_image
|
|
164
|
+
|
|
165
|
+
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
166
|
+
button = "middle" if button == "wheel" else button
|
|
167
|
+
await self.instance.computer(
|
|
168
|
+
action="click_mouse",
|
|
169
|
+
click_type="click",
|
|
170
|
+
button=button,
|
|
171
|
+
coordinates=[x, y],
|
|
172
|
+
num_clicks=1,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
async def double_click(self, x: int, y: int) -> None:
|
|
176
|
+
await self.instance.computer(
|
|
177
|
+
action="click_mouse",
|
|
178
|
+
click_type="click",
|
|
179
|
+
button="left",
|
|
180
|
+
coordinates=[x, y],
|
|
181
|
+
num_clicks=2,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
185
|
+
await self.instance.computer(
|
|
186
|
+
action="scroll",
|
|
187
|
+
coordinates=[x, y],
|
|
188
|
+
delta_x=scroll_x // 20,
|
|
189
|
+
delta_y=scroll_y // 20,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
async def type(self, text: str) -> None:
|
|
193
|
+
await self.instance.computer(action="type_text", text=text)
|
|
194
|
+
|
|
195
|
+
async def wait(self, ms: int = 1000) -> None:
|
|
196
|
+
time.sleep(ms / 1000)
|
|
197
|
+
# Scrapybara also has `self.instance.computer(action="wait", duration=ms / 1000)`
|
|
198
|
+
|
|
199
|
+
async def move(self, x: int, y: int) -> None:
|
|
200
|
+
await self.instance.computer(action="move_mouse", coordinates=[x, y])
|
|
201
|
+
|
|
202
|
+
async def keypress(self, keys: list[str]) -> None:
|
|
203
|
+
mapped_keys = [
|
|
204
|
+
CUA_KEY_TO_SCRAPYBARA_KEY.get(key.lower(), key.lower()) for key in keys
|
|
205
|
+
]
|
|
206
|
+
await self.instance.computer(action="press_key", keys=mapped_keys)
|
|
207
|
+
|
|
208
|
+
async def drag(self, path: list[dict[str, int]]) -> None:
|
|
209
|
+
if not path:
|
|
210
|
+
return
|
|
211
|
+
path = [[point["x"], point["y"]] for point in path]
|
|
212
|
+
await self.instance.computer(action="drag_mouse", path=path)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import requests
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
import json
|
|
5
|
+
import base64
|
|
6
|
+
from PIL import Image
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
import io
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
load_dotenv(override=True)
|
|
12
|
+
|
|
13
|
+
BLOCKED_DOMAINS = [
|
|
14
|
+
"maliciousbook.com",
|
|
15
|
+
"evilvideos.com",
|
|
16
|
+
"darkwebforum.com",
|
|
17
|
+
"shadytok.com",
|
|
18
|
+
"suspiciouspins.com",
|
|
19
|
+
"ilanbigio.com",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def pp(obj):
|
|
24
|
+
print(json.dumps(obj, indent=4))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def show_image(base_64_image):
|
|
28
|
+
image_data = base64.b64decode(base_64_image)
|
|
29
|
+
image = Image.open(BytesIO(image_data))
|
|
30
|
+
image.show()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def calculate_image_dimensions(base_64_image):
|
|
34
|
+
image_data = base64.b64decode(base_64_image)
|
|
35
|
+
image = Image.open(io.BytesIO(image_data))
|
|
36
|
+
return image.size
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def sanitize_message(msg: dict) -> dict:
|
|
40
|
+
"""Return a copy of the message with image_url omitted for computer_call_output messages."""
|
|
41
|
+
if msg.get("type") == "computer_call_output":
|
|
42
|
+
output = msg.get("output", {})
|
|
43
|
+
if isinstance(output, dict):
|
|
44
|
+
sanitized = msg.copy()
|
|
45
|
+
sanitized["output"] = {**output, "image_url": "[omitted]"}
|
|
46
|
+
return sanitized
|
|
47
|
+
return msg
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def track_response(**kwargs):
|
|
51
|
+
url = "https://api.openai.com/v1/responses"
|
|
52
|
+
headers = {
|
|
53
|
+
"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
|
|
54
|
+
"Content-Type": "application/json",
|
|
55
|
+
# TODO: remove for launch
|
|
56
|
+
"Openai-beta": "responses=v1",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
openai_org = os.getenv("OPENAI_ORG")
|
|
60
|
+
if openai_org:
|
|
61
|
+
headers["Openai-Organization"] = openai_org
|
|
62
|
+
|
|
63
|
+
response = requests.post(url, headers=headers, json=kwargs)
|
|
64
|
+
|
|
65
|
+
if response.status_code != 200:
|
|
66
|
+
print(f"Error: {response.status_code} {response.text}")
|
|
67
|
+
|
|
68
|
+
return response.json()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def check_blocklisted_url(url: str) -> None:
|
|
72
|
+
"""Raise ValueError if the given URL (including subdomains) is in the blocklist."""
|
|
73
|
+
hostname = urlparse(url).hostname or ""
|
|
74
|
+
if any(
|
|
75
|
+
hostname == blocked or hostname.endswith(f".{blocked}")
|
|
76
|
+
for blocked in BLOCKED_DOMAINS
|
|
77
|
+
):
|
|
78
|
+
raise ValueError(f"Blocked URL: {url}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.6.7"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: meshagent-computers
|
|
3
|
+
Version: 0.6.7
|
|
4
|
+
Summary: Computer Building Blocks for Meshagent
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Project-URL: Documentation, https://docs.meshagent.com
|
|
7
|
+
Project-URL: Website, https://www.meshagent.com
|
|
8
|
+
Project-URL: Source, https://www.meshagent.com
|
|
9
|
+
Requires-Python: >=3.13
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: pytest~=8.4
|
|
13
|
+
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
+
Requires-Dist: openai~=2.6.0
|
|
15
|
+
Requires-Dist: meshagent-api~=0.6.7
|
|
16
|
+
Requires-Dist: meshagent-agents~=0.6.7
|
|
17
|
+
Requires-Dist: meshagent-tools~=0.6.7
|
|
18
|
+
Requires-Dist: playwright~=1.51
|
|
19
|
+
Requires-Dist: browserbase~=1.2
|
|
20
|
+
Requires-Dist: scrapybara~=2.4
|
|
21
|
+
Requires-Dist: pillow~=11.3.0
|
|
22
|
+
Requires-Dist: python-dotenv~=1.1.1
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# [Meshagent](https://www.meshagent.com)
|
|
26
|
+
|
|
27
|
+
## MeshAgent Computers
|
|
28
|
+
|
|
29
|
+
The ``meshagent.computers`` package defines abstractions for controlling browsers and operating systems and providing these abilities to agents.
|
|
30
|
+
|
|
31
|
+
### ComputerAgent
|
|
32
|
+
The ComputerAgent in `meshagent-computers` extends the ``ChatBot`` with support for using browsers and computers. The computer agent will periodically send screenshots to participants on the thread using the MeshAgent messaging protocol, by sending a message of the type "computer_screen" and an attachment that contains a binary screenshot.
|
|
33
|
+
|
|
34
|
+
```Python Python
|
|
35
|
+
from meshagent.api import RequiredToolkit
|
|
36
|
+
from meshagent.openai import OpenAIResponsesAdapter
|
|
37
|
+
from meshagent.computers import ComputerAgent, BrowserbaseBrowser, Operator
|
|
38
|
+
from meshagent.api.services import ServiceHost
|
|
39
|
+
|
|
40
|
+
service = ServiceHost()
|
|
41
|
+
|
|
42
|
+
@service.path("/computeragent")
|
|
43
|
+
class BrowserbaseAgent(ComputerAgent):
|
|
44
|
+
def __init__(self):
|
|
45
|
+
super().__init__(
|
|
46
|
+
name="meshagent.browser",
|
|
47
|
+
title="browser agent",
|
|
48
|
+
description="a task runner that can use a browser",
|
|
49
|
+
requires=[RequiredToolkit(name="ui", tools=[])],
|
|
50
|
+
llm_adapter=OpenAIResponsesAdapter(
|
|
51
|
+
model="computer-use-preview",
|
|
52
|
+
response_options={"reasoning": {"generate_summary": "concise"}, "truncation": "auto"},
|
|
53
|
+
),
|
|
54
|
+
labels=["tasks", "computers"],
|
|
55
|
+
computer_cls=BrowserbaseBrowser,
|
|
56
|
+
operator_cls=Operator
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
asyncio.run(service.run())
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
### Learn more about MeshAgent on our website or check out the docs for additional examples!
|
|
64
|
+
|
|
65
|
+
**Website**: [www.meshagent.com](https://www.meshagent.com/)
|
|
66
|
+
|
|
67
|
+
**Documentation**: [docs.meshagent.com](https://docs.meshagent.com/)
|
|
68
|
+
|
|
69
|
+
---
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
meshagent/computers/__init__.py,sha256=DEm-FN2iAoC5AJmbIeZR0NrNUDGyh0hWqjdfBhv9GSg,519
|
|
2
|
+
meshagent/computers/agent.py,sha256=irojG8qXxfdJ2shs0nxdEUWLP6Wycpx3IcXKjr-Wz6g,8129
|
|
3
|
+
meshagent/computers/base_playwright.py,sha256=QOzopf6Drs0uP_itg9iusZ2Vn6BvRoV6iQtCRCmNwRA,5887
|
|
4
|
+
meshagent/computers/browserbase.py,sha256=VdvGsasnEMVndi4fSyF5EbXMIRPRMmVzCk603H_dyHQ,8104
|
|
5
|
+
meshagent/computers/computer.py,sha256=znXIMMNxbDBYH0aRfbTjQkVFuAmuS9rqoahBAx_lCrI,1068
|
|
6
|
+
meshagent/computers/docker.py,sha256=ZR9l8HOf9U8FL51oaM9t73emJQbOHjQ7-aC2UzHhDMU,6332
|
|
7
|
+
meshagent/computers/local_playwright.py,sha256=r28blerW1BxiVL0B4Pnkv9Qz5-LtU83rQOrHhXbjeXQ,930
|
|
8
|
+
meshagent/computers/operator.py,sha256=29LDS6Jmg5xxIWD_mpXsA0eP0ZjwNYkwZQFChZzJuNs,2914
|
|
9
|
+
meshagent/computers/scrapybara.py,sha256=IoeV02QcBDkpW05A1NH-lc4jnBEmZ4SPsRo3nnXC3UE,7502
|
|
10
|
+
meshagent/computers/utils.py,sha256=N7Ltjx3HfhJ-CYectNodGsBtddk6mSTHkCH0meqHT70,2080
|
|
11
|
+
meshagent/computers/version.py,sha256=F6dMrYXtnIxoVQIhQnq6i1IIuwiHqeZxZMHThUjr2vM,22
|
|
12
|
+
meshagent_computers-0.6.7.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
13
|
+
meshagent_computers-0.6.7.dist-info/METADATA,sha256=1wY8SwHQL0h4CWH5XUxN16bnbIzEj4fTuCSymA586sE,2532
|
|
14
|
+
meshagent_computers-0.6.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
meshagent_computers-0.6.7.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
16
|
+
meshagent_computers-0.6.7.dist-info/RECORD,,
|