orgo 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orgo/__init__.py +7 -0
- orgo/api/__init__.py +6 -0
- orgo/api/client.py +183 -0
- orgo/computer.py +326 -0
- orgo/project.py +87 -0
- orgo/prompt.py +452 -0
- orgo/utils/__init__.py +6 -0
- orgo/utils/auth.py +17 -0
- orgo-0.0.32.dist-info/METADATA +45 -0
- orgo-0.0.32.dist-info/RECORD +12 -0
- orgo-0.0.32.dist-info/WHEEL +5 -0
- orgo-0.0.32.dist-info/top_level.txt +1 -0
orgo/__init__.py
ADDED
orgo/api/__init__.py
ADDED
orgo/api/client.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""API client for Orgo service"""
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
from typing import Dict, Any, Optional, List
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from orgo.utils.auth import get_api_key
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class ApiClient:
|
|
12
|
+
def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
|
|
13
|
+
self.api_key = get_api_key(api_key)
|
|
14
|
+
self.base_url = base_url or "https://www.orgo.ai/api"
|
|
15
|
+
self.session = requests.Session()
|
|
16
|
+
self.session.headers.update({
|
|
17
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
18
|
+
"Content-Type": "application/json",
|
|
19
|
+
"Accept": "application/json"
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
def _request(self, method: str, endpoint: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
23
|
+
url = f"{self.base_url}/{endpoint}"
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
if method.upper() == "GET":
|
|
27
|
+
response = self.session.get(url, params=data)
|
|
28
|
+
else:
|
|
29
|
+
response = self.session.request(method, url, json=data)
|
|
30
|
+
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
return response.json()
|
|
33
|
+
except requests.exceptions.RequestException as e:
|
|
34
|
+
# Log the full error for debugging
|
|
35
|
+
logger.debug(f"API request failed: {method} {url}", exc_info=True)
|
|
36
|
+
|
|
37
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
38
|
+
try:
|
|
39
|
+
error_data = e.response.json()
|
|
40
|
+
if 'error' in error_data:
|
|
41
|
+
raise Exception(error_data['error']) from None
|
|
42
|
+
except (ValueError, KeyError):
|
|
43
|
+
pass
|
|
44
|
+
raise Exception(f"Request failed with status {e.response.status_code}") from None
|
|
45
|
+
|
|
46
|
+
# Generic error message without exposing internal details
|
|
47
|
+
raise Exception("Failed to connect to Orgo service. Please check your connection and try again.") from None
|
|
48
|
+
|
|
49
|
+
# Project methods
|
|
50
|
+
def create_project(self, name: str) -> Dict[str, Any]:
|
|
51
|
+
"""Create a new named project"""
|
|
52
|
+
return self._request("POST", "projects", {"name": name})
|
|
53
|
+
|
|
54
|
+
def get_project_by_name(self, name: str) -> Dict[str, Any]:
|
|
55
|
+
"""Get project details by name"""
|
|
56
|
+
projects = self.list_projects()
|
|
57
|
+
for project in projects:
|
|
58
|
+
if project.get("name") == name:
|
|
59
|
+
return project
|
|
60
|
+
raise Exception(f"Project '{name}' not found") from None
|
|
61
|
+
|
|
62
|
+
def get_project(self, project_id: str) -> Dict[str, Any]:
|
|
63
|
+
"""Get project details by ID"""
|
|
64
|
+
return self._request("GET", f"projects/{project_id}")
|
|
65
|
+
|
|
66
|
+
def list_projects(self) -> List[Dict[str, Any]]:
|
|
67
|
+
"""List all projects"""
|
|
68
|
+
response = self._request("GET", "projects")
|
|
69
|
+
return response.get("projects", [])
|
|
70
|
+
|
|
71
|
+
def delete_project(self, project_id: str) -> Dict[str, Any]:
|
|
72
|
+
"""Delete a project and all its computers"""
|
|
73
|
+
return self._request("DELETE", f"projects/{project_id}")
|
|
74
|
+
|
|
75
|
+
# Computer methods
|
|
76
|
+
def create_computer(self, project_id: str, computer_name: str,
|
|
77
|
+
os: str = "linux", ram: int = 2, cpu: int = 2,
|
|
78
|
+
gpu: str = "none") -> Dict[str, Any]:
|
|
79
|
+
"""Create a new computer within a project"""
|
|
80
|
+
return self._request("POST", "computers", {
|
|
81
|
+
"project_id": project_id,
|
|
82
|
+
"name": computer_name,
|
|
83
|
+
"os": os,
|
|
84
|
+
"ram": ram,
|
|
85
|
+
"cpu": cpu,
|
|
86
|
+
"gpu": gpu
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
def list_computers(self, project_id: str) -> List[Dict[str, Any]]:
|
|
90
|
+
"""List all computers in a project"""
|
|
91
|
+
project = self.get_project(project_id)
|
|
92
|
+
return project.get("desktops", [])
|
|
93
|
+
|
|
94
|
+
def get_computer(self, computer_id: str) -> Dict[str, Any]:
|
|
95
|
+
"""Get computer details"""
|
|
96
|
+
return self._request("GET", f"computers/{computer_id}")
|
|
97
|
+
|
|
98
|
+
def delete_computer(self, computer_id: str) -> Dict[str, Any]:
|
|
99
|
+
"""Delete a computer"""
|
|
100
|
+
return self._request("DELETE", f"computers/{computer_id}")
|
|
101
|
+
|
|
102
|
+
def restart_computer(self, computer_id: str) -> Dict[str, Any]:
|
|
103
|
+
"""Restart a computer"""
|
|
104
|
+
return self._request("POST", f"computers/{computer_id}/restart")
|
|
105
|
+
|
|
106
|
+
# Computer control methods
|
|
107
|
+
def left_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
|
|
108
|
+
return self._request("POST", f"computers/{computer_id}/click", {
|
|
109
|
+
"button": "left", "x": x, "y": y
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
def right_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
|
|
113
|
+
return self._request("POST", f"computers/{computer_id}/click", {
|
|
114
|
+
"button": "right", "x": x, "y": y
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
def double_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
|
|
118
|
+
return self._request("POST", f"computers/{computer_id}/click", {
|
|
119
|
+
"button": "left", "x": x, "y": y, "double": True
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
def drag(self, computer_id: str, start_x: int, start_y: int,
|
|
123
|
+
end_x: int, end_y: int, button: str = "left",
|
|
124
|
+
duration: float = 0.5) -> Dict[str, Any]:
|
|
125
|
+
"""Perform a drag operation from start to end coordinates"""
|
|
126
|
+
return self._request("POST", f"computers/{computer_id}/drag", {
|
|
127
|
+
"start_x": start_x,
|
|
128
|
+
"start_y": start_y,
|
|
129
|
+
"end_x": end_x,
|
|
130
|
+
"end_y": end_y,
|
|
131
|
+
"button": button,
|
|
132
|
+
"duration": duration
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
def scroll(self, computer_id: str, direction: str, amount: int = 3) -> Dict[str, Any]:
|
|
136
|
+
return self._request("POST", f"computers/{computer_id}/scroll", {
|
|
137
|
+
"direction": direction, "amount": amount
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
def type_text(self, computer_id: str, text: str) -> Dict[str, Any]:
|
|
141
|
+
return self._request("POST", f"computers/{computer_id}/type", {
|
|
142
|
+
"text": text
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
def key_press(self, computer_id: str, key: str) -> Dict[str, Any]:
|
|
146
|
+
return self._request("POST", f"computers/{computer_id}/key", {
|
|
147
|
+
"key": key
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
def get_screenshot(self, computer_id: str) -> Dict[str, Any]:
|
|
151
|
+
return self._request("GET", f"computers/{computer_id}/screenshot")
|
|
152
|
+
|
|
153
|
+
def execute_bash(self, computer_id: str, command: str) -> Dict[str, Any]:
|
|
154
|
+
return self._request("POST", f"computers/{computer_id}/bash", {
|
|
155
|
+
"command": command
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
def execute_python(self, computer_id: str, code: str, timeout: int = 10) -> Dict[str, Any]:
|
|
159
|
+
"""Execute Python code on the computer"""
|
|
160
|
+
return self._request("POST", f"computers/{computer_id}/exec", {
|
|
161
|
+
"code": code,
|
|
162
|
+
"timeout": timeout
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
def wait(self, computer_id: str, duration: float) -> Dict[str, Any]:
|
|
166
|
+
return self._request("POST", f"computers/{computer_id}/wait", {
|
|
167
|
+
"duration": duration
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
# Streaming methods
|
|
171
|
+
def start_stream(self, computer_id: str, connection_name: str) -> Dict[str, Any]:
|
|
172
|
+
"""Start streaming to a configured RTMP connection"""
|
|
173
|
+
return self._request("POST", f"computers/{computer_id}/stream/start", {
|
|
174
|
+
"connection_name": connection_name
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
def stop_stream(self, computer_id: str) -> Dict[str, Any]:
|
|
178
|
+
"""Stop the active stream"""
|
|
179
|
+
return self._request("POST", f"computers/{computer_id}/stream/stop")
|
|
180
|
+
|
|
181
|
+
def get_stream_status(self, computer_id: str) -> Dict[str, Any]:
|
|
182
|
+
"""Get current stream status"""
|
|
183
|
+
return self._request("GET", f"computers/{computer_id}/stream/status")
|
orgo/computer.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
"""Computer class for interacting with Orgo virtual environments"""
|
|
2
|
+
import os as operating_system
|
|
3
|
+
import base64
|
|
4
|
+
import logging
|
|
5
|
+
import uuid
|
|
6
|
+
import io
|
|
7
|
+
from typing import Dict, List, Any, Optional, Callable, Literal, Union
|
|
8
|
+
from PIL import Image
|
|
9
|
+
import requests
|
|
10
|
+
from requests.exceptions import RequestException
|
|
11
|
+
|
|
12
|
+
from .api.client import ApiClient
|
|
13
|
+
from .prompt import get_provider
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class Computer:
|
|
18
|
+
def __init__(self,
|
|
19
|
+
project: Optional[Union[str, 'Project']] = None,
|
|
20
|
+
name: Optional[str] = None,
|
|
21
|
+
computer_id: Optional[str] = None,
|
|
22
|
+
api_key: Optional[str] = None,
|
|
23
|
+
base_api_url: Optional[str] = None,
|
|
24
|
+
ram: Optional[Literal[1, 2, 4, 8, 16, 32, 64]] = None,
|
|
25
|
+
memory: Optional[Literal[1, 2, 4, 8, 16, 32, 64]] = None,
|
|
26
|
+
cpu: Optional[Literal[1, 2, 4, 8, 16]] = None,
|
|
27
|
+
os: Optional[Literal["linux", "windows"]] = None,
|
|
28
|
+
gpu: Optional[Literal["none", "a10", "l40s", "a100-40gb", "a100-80gb"]] = None):
|
|
29
|
+
"""
|
|
30
|
+
Initialize an Orgo virtual computer.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
project: Project name (str) or Project instance. If not provided, creates a new project.
|
|
34
|
+
name: Computer name within the project (optional, auto-generated if not provided)
|
|
35
|
+
computer_id: Existing computer ID to connect to (optional)
|
|
36
|
+
api_key: Orgo API key (defaults to ORGO_API_KEY env var)
|
|
37
|
+
base_api_url: Custom API URL (optional)
|
|
38
|
+
ram/memory: RAM in GB (1, 2, 4, 8, 16, 32, or 64) - only used when creating
|
|
39
|
+
cpu: CPU cores (1, 2, 4, 8, or 16) - only used when creating
|
|
40
|
+
os: Operating system ("linux" or "windows") - only used when creating
|
|
41
|
+
gpu: GPU type - only used when creating
|
|
42
|
+
|
|
43
|
+
Examples:
|
|
44
|
+
# Create computer in new project
|
|
45
|
+
computer = Computer(ram=4, cpu=2)
|
|
46
|
+
|
|
47
|
+
# Create computer in existing project
|
|
48
|
+
computer = Computer(project="manus", ram=4, cpu=2)
|
|
49
|
+
|
|
50
|
+
# Connect to existing computer by ID
|
|
51
|
+
computer = Computer(computer_id="11c4fd46-e069-4c32-be65-f82d9f87b9b8")
|
|
52
|
+
"""
|
|
53
|
+
self.api_key = api_key or operating_system.environ.get("ORGO_API_KEY")
|
|
54
|
+
self.base_api_url = base_api_url
|
|
55
|
+
self.api = ApiClient(self.api_key, self.base_api_url)
|
|
56
|
+
|
|
57
|
+
# Handle memory parameter as an alias for ram
|
|
58
|
+
if ram is None and memory is not None:
|
|
59
|
+
ram = memory
|
|
60
|
+
|
|
61
|
+
# Store configuration
|
|
62
|
+
self.os = os or "linux"
|
|
63
|
+
self.ram = ram or 2
|
|
64
|
+
self.cpu = cpu or 2
|
|
65
|
+
self.gpu = gpu or "none"
|
|
66
|
+
|
|
67
|
+
if computer_id:
|
|
68
|
+
# Just store the computer ID, no API call needed
|
|
69
|
+
self.computer_id = computer_id
|
|
70
|
+
self.name = name
|
|
71
|
+
self.project_id = None
|
|
72
|
+
self.project_name = None
|
|
73
|
+
logger.info(f"Connected to computer ID: {self.computer_id}")
|
|
74
|
+
elif project:
|
|
75
|
+
# Work with specified project
|
|
76
|
+
if isinstance(project, str):
|
|
77
|
+
# Project name provided
|
|
78
|
+
self.project_name = project
|
|
79
|
+
self._initialize_with_project_name(project, name)
|
|
80
|
+
else:
|
|
81
|
+
# Project instance provided
|
|
82
|
+
from .project import Project as ProjectClass
|
|
83
|
+
if isinstance(project, ProjectClass):
|
|
84
|
+
self.project_name = project.name
|
|
85
|
+
self.project_id = project.id
|
|
86
|
+
self._initialize_with_project_instance(project, name)
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError("project must be a string (project name) or Project instance")
|
|
89
|
+
else:
|
|
90
|
+
# No project specified, create a new one
|
|
91
|
+
self._create_new_project_and_computer(name)
|
|
92
|
+
|
|
93
|
+
def _initialize_with_project_name(self, project_name: str, computer_name: Optional[str]):
|
|
94
|
+
"""Initialize with a project name (create project if needed)"""
|
|
95
|
+
try:
|
|
96
|
+
# Try to get existing project
|
|
97
|
+
project = self.api.get_project_by_name(project_name)
|
|
98
|
+
self.project_id = project.get("id")
|
|
99
|
+
|
|
100
|
+
# Check for existing computers
|
|
101
|
+
computers = self.api.list_computers(self.project_id)
|
|
102
|
+
|
|
103
|
+
if computer_name:
|
|
104
|
+
# Look for specific computer
|
|
105
|
+
existing = next((c for c in computers if c.get("name") == computer_name), None)
|
|
106
|
+
if existing:
|
|
107
|
+
self._connect_to_existing_computer(existing)
|
|
108
|
+
else:
|
|
109
|
+
# Create new computer with specified name
|
|
110
|
+
self._create_computer(self.project_id, computer_name)
|
|
111
|
+
elif computers:
|
|
112
|
+
# No name specified, use first available computer
|
|
113
|
+
self._connect_to_existing_computer(computers[0])
|
|
114
|
+
else:
|
|
115
|
+
# No computers exist, create new one
|
|
116
|
+
self._create_computer(self.project_id, computer_name)
|
|
117
|
+
|
|
118
|
+
except Exception:
|
|
119
|
+
# Project doesn't exist, create it
|
|
120
|
+
logger.info(f"Project {project_name} not found, creating new project")
|
|
121
|
+
project = self.api.create_project(project_name)
|
|
122
|
+
self.project_id = project.get("id")
|
|
123
|
+
self._create_computer(self.project_id, computer_name)
|
|
124
|
+
|
|
125
|
+
def _initialize_with_project_instance(self, project: 'Project', computer_name: Optional[str]):
|
|
126
|
+
"""Initialize with a Project instance"""
|
|
127
|
+
computers = project.list_computers()
|
|
128
|
+
|
|
129
|
+
if computer_name:
|
|
130
|
+
# Look for specific computer
|
|
131
|
+
existing = next((c for c in computers if c.get("name") == computer_name), None)
|
|
132
|
+
if existing:
|
|
133
|
+
self._connect_to_existing_computer(existing)
|
|
134
|
+
else:
|
|
135
|
+
# Create new computer with specified name
|
|
136
|
+
self._create_computer(project.id, computer_name)
|
|
137
|
+
elif computers:
|
|
138
|
+
# No name specified, use first available computer
|
|
139
|
+
self._connect_to_existing_computer(computers[0])
|
|
140
|
+
else:
|
|
141
|
+
# No computers exist, create new one
|
|
142
|
+
self._create_computer(project.id, computer_name)
|
|
143
|
+
|
|
144
|
+
def _create_new_project_and_computer(self, computer_name: Optional[str]):
|
|
145
|
+
"""Create a new project and computer"""
|
|
146
|
+
# Generate a unique project name
|
|
147
|
+
project_name = f"project-{uuid.uuid4().hex[:8]}"
|
|
148
|
+
|
|
149
|
+
# Create the project
|
|
150
|
+
project = self.api.create_project(project_name)
|
|
151
|
+
self.project_id = project.get("id")
|
|
152
|
+
self.project_name = project_name
|
|
153
|
+
|
|
154
|
+
# Create a computer in the new project
|
|
155
|
+
self._create_computer(self.project_id, computer_name)
|
|
156
|
+
|
|
157
|
+
def _connect_to_existing_computer(self, computer_info: Dict[str, Any]):
|
|
158
|
+
"""Connect to an existing computer"""
|
|
159
|
+
self.computer_id = computer_info.get("id")
|
|
160
|
+
self.name = computer_info.get("name")
|
|
161
|
+
logger.info(f"Connected to existing computer {self.name} (ID: {self.computer_id})")
|
|
162
|
+
|
|
163
|
+
def _create_computer(self, project_id: str, computer_name: Optional[str]):
|
|
164
|
+
"""Create a new computer in the project"""
|
|
165
|
+
# Generate name if not provided
|
|
166
|
+
if not computer_name:
|
|
167
|
+
computer_name = f"desktop-{uuid.uuid4().hex[:8]}"
|
|
168
|
+
|
|
169
|
+
self.name = computer_name
|
|
170
|
+
|
|
171
|
+
# Validate parameters
|
|
172
|
+
if self.ram not in [1, 2, 4, 8, 16, 32, 64]:
|
|
173
|
+
raise ValueError("ram must be one of: 1, 2, 4, 8, 16, 32, 64 GB")
|
|
174
|
+
if self.cpu not in [1, 2, 4, 8, 16]:
|
|
175
|
+
raise ValueError("cpu must be one of: 1, 2, 4, 8, 16 cores")
|
|
176
|
+
if self.os not in ["linux", "windows"]:
|
|
177
|
+
raise ValueError("os must be either 'linux' or 'windows'")
|
|
178
|
+
if self.gpu not in ["none", "a10", "l40s", "a100-40gb", "a100-80gb"]:
|
|
179
|
+
raise ValueError("gpu must be one of: 'none', 'a10', 'l40s', 'a100-40gb', 'a100-80gb'")
|
|
180
|
+
|
|
181
|
+
computer = self.api.create_computer(
|
|
182
|
+
project_id=project_id,
|
|
183
|
+
computer_name=computer_name,
|
|
184
|
+
os=self.os,
|
|
185
|
+
ram=self.ram,
|
|
186
|
+
cpu=self.cpu,
|
|
187
|
+
gpu=self.gpu
|
|
188
|
+
)
|
|
189
|
+
self.computer_id = computer.get("id")
|
|
190
|
+
logger.info(f"Created new computer {self.name} (ID: {self.computer_id})")
|
|
191
|
+
|
|
192
|
+
def status(self) -> Dict[str, Any]:
|
|
193
|
+
"""Get current computer status"""
|
|
194
|
+
return self.api.get_computer(self.computer_id)
|
|
195
|
+
|
|
196
|
+
def restart(self) -> Dict[str, Any]:
|
|
197
|
+
"""Restart the computer"""
|
|
198
|
+
return self.api.restart_computer(self.computer_id)
|
|
199
|
+
|
|
200
|
+
def destroy(self) -> Dict[str, Any]:
|
|
201
|
+
"""Terminate and delete the computer instance"""
|
|
202
|
+
return self.api.delete_computer(self.computer_id)
|
|
203
|
+
|
|
204
|
+
# Navigation methods
|
|
205
|
+
def left_click(self, x: int, y: int) -> Dict[str, Any]:
|
|
206
|
+
"""Perform left mouse click at specified coordinates"""
|
|
207
|
+
return self.api.left_click(self.computer_id, x, y)
|
|
208
|
+
|
|
209
|
+
def right_click(self, x: int, y: int) -> Dict[str, Any]:
|
|
210
|
+
"""Perform right mouse click at specified coordinates"""
|
|
211
|
+
return self.api.right_click(self.computer_id, x, y)
|
|
212
|
+
|
|
213
|
+
def double_click(self, x: int, y: int) -> Dict[str, Any]:
|
|
214
|
+
"""Perform double click at specified coordinates"""
|
|
215
|
+
return self.api.double_click(self.computer_id, x, y)
|
|
216
|
+
|
|
217
|
+
def drag(self, start_x: int, start_y: int, end_x: int, end_y: int,
|
|
218
|
+
button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
|
|
219
|
+
"""Perform a smooth drag operation from start to end coordinates"""
|
|
220
|
+
return self.api.drag(self.computer_id, start_x, start_y, end_x, end_y, button, duration)
|
|
221
|
+
|
|
222
|
+
def scroll(self, direction: str = "down", amount: int = 3) -> Dict[str, Any]:
|
|
223
|
+
"""Scroll in specified direction and amount"""
|
|
224
|
+
return self.api.scroll(self.computer_id, direction, amount)
|
|
225
|
+
|
|
226
|
+
# Input methods
|
|
227
|
+
def type(self, text: str) -> Dict[str, Any]:
|
|
228
|
+
"""Type the specified text"""
|
|
229
|
+
return self.api.type_text(self.computer_id, text)
|
|
230
|
+
|
|
231
|
+
def key(self, key: str) -> Dict[str, Any]:
|
|
232
|
+
"""Press a key or key combination (e.g., "Enter", "ctrl+c")"""
|
|
233
|
+
return self.api.key_press(self.computer_id, key)
|
|
234
|
+
|
|
235
|
+
# View methods
|
|
236
|
+
def screenshot(self) -> Image.Image:
|
|
237
|
+
"""Capture screenshot and return as PIL Image"""
|
|
238
|
+
response = self.api.get_screenshot(self.computer_id)
|
|
239
|
+
image_data = response.get("image", "")
|
|
240
|
+
|
|
241
|
+
if image_data.startswith(('http://', 'https://')):
|
|
242
|
+
img_response = requests.get(image_data)
|
|
243
|
+
img_response.raise_for_status()
|
|
244
|
+
return Image.open(io.BytesIO(img_response.content))
|
|
245
|
+
else:
|
|
246
|
+
img_data = base64.b64decode(image_data)
|
|
247
|
+
return Image.open(io.BytesIO(img_data))
|
|
248
|
+
|
|
249
|
+
def screenshot_base64(self) -> str:
|
|
250
|
+
"""Capture screenshot and return as base64 string"""
|
|
251
|
+
response = self.api.get_screenshot(self.computer_id)
|
|
252
|
+
image_data = response.get("image", "")
|
|
253
|
+
|
|
254
|
+
if image_data.startswith(('http://', 'https://')):
|
|
255
|
+
img_response = requests.get(image_data)
|
|
256
|
+
img_response.raise_for_status()
|
|
257
|
+
return base64.b64encode(img_response.content).decode('utf-8')
|
|
258
|
+
else:
|
|
259
|
+
return image_data
|
|
260
|
+
|
|
261
|
+
# Execution methods
|
|
262
|
+
def bash(self, command: str) -> str:
|
|
263
|
+
"""Execute a bash command and return output"""
|
|
264
|
+
response = self.api.execute_bash(self.computer_id, command)
|
|
265
|
+
return response.get("output", "")
|
|
266
|
+
|
|
267
|
+
def exec(self, code: str, timeout: int = 10) -> Dict[str, Any]:
|
|
268
|
+
"""Execute Python code on the remote computer"""
|
|
269
|
+
response = self.api.execute_python(self.computer_id, code, timeout)
|
|
270
|
+
return response
|
|
271
|
+
|
|
272
|
+
def wait(self, seconds: float) -> Dict[str, Any]:
|
|
273
|
+
"""Wait for specified number of seconds"""
|
|
274
|
+
return self.api.wait(self.computer_id, seconds)
|
|
275
|
+
|
|
276
|
+
# Streaming methods
|
|
277
|
+
def start_stream(self, connection: str) -> Dict[str, Any]:
|
|
278
|
+
"""Start streaming the computer screen to an RTMP server"""
|
|
279
|
+
return self.api.start_stream(self.computer_id, connection)
|
|
280
|
+
|
|
281
|
+
def stop_stream(self) -> Dict[str, Any]:
|
|
282
|
+
"""Stop the active stream"""
|
|
283
|
+
return self.api.stop_stream(self.computer_id)
|
|
284
|
+
|
|
285
|
+
def stream_status(self) -> Dict[str, Any]:
|
|
286
|
+
"""Get the current streaming status"""
|
|
287
|
+
return self.api.get_stream_status(self.computer_id)
|
|
288
|
+
|
|
289
|
+
# AI control method
|
|
290
|
+
def prompt(self,
|
|
291
|
+
instruction: str,
|
|
292
|
+
provider: str = "anthropic",
|
|
293
|
+
model: str = "claude-3-7-sonnet-20250219",
|
|
294
|
+
display_width: int = 1024,
|
|
295
|
+
display_height: int = 768,
|
|
296
|
+
callback: Optional[Callable[[str, Any], None]] = None,
|
|
297
|
+
thinking_enabled: bool = False,
|
|
298
|
+
thinking_budget: int = 1024,
|
|
299
|
+
max_tokens: int = 4096,
|
|
300
|
+
max_iterations: int = 20,
|
|
301
|
+
max_saved_screenshots: int = 5,
|
|
302
|
+
api_key: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
303
|
+
"""Control the computer with natural language instructions using an AI assistant"""
|
|
304
|
+
provider_instance = get_provider(provider)
|
|
305
|
+
|
|
306
|
+
return provider_instance.execute(
|
|
307
|
+
computer_id=self.computer_id,
|
|
308
|
+
instruction=instruction,
|
|
309
|
+
callback=callback,
|
|
310
|
+
api_key=api_key,
|
|
311
|
+
model=model,
|
|
312
|
+
display_width=display_width,
|
|
313
|
+
display_height=display_height,
|
|
314
|
+
thinking_enabled=thinking_enabled,
|
|
315
|
+
thinking_budget=thinking_budget,
|
|
316
|
+
max_tokens=max_tokens,
|
|
317
|
+
max_iterations=max_iterations,
|
|
318
|
+
max_saved_screenshots=max_saved_screenshots,
|
|
319
|
+
orgo_api_key=self.api_key,
|
|
320
|
+
orgo_base_url=self.base_api_url
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def __repr__(self):
|
|
324
|
+
project_str = f", project='{self.project_name}'" if hasattr(self, 'project_name') and self.project_name else ""
|
|
325
|
+
name_str = f"name='{self.name}'" if hasattr(self, 'name') and self.name else f"id='{self.computer_id}'"
|
|
326
|
+
return f"Computer({name_str}{project_str})"
|
orgo/project.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Project class for managing Orgo projects"""
|
|
2
|
+
import os as operating_system # Renamed to avoid any potential conflicts
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Dict, List, Any, Optional
|
|
5
|
+
|
|
6
|
+
from .api.client import ApiClient
|
|
7
|
+
|
|
8
|
+
class Project:
|
|
9
|
+
def __init__(self,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
api_key: Optional[str] = None,
|
|
12
|
+
base_api_url: Optional[str] = None):
|
|
13
|
+
"""
|
|
14
|
+
Initialize an Orgo project.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
name: Project name. If exists, connects to it. If not, creates it.
|
|
18
|
+
api_key: Orgo API key (defaults to ORGO_API_KEY env var)
|
|
19
|
+
base_api_url: Custom API URL (optional)
|
|
20
|
+
"""
|
|
21
|
+
self.api_key = api_key or operating_system.environ.get("ORGO_API_KEY")
|
|
22
|
+
self.base_api_url = base_api_url
|
|
23
|
+
self.api = ApiClient(self.api_key, self.base_api_url)
|
|
24
|
+
|
|
25
|
+
if name:
|
|
26
|
+
self.name = name
|
|
27
|
+
else:
|
|
28
|
+
# Generate a unique name if not provided
|
|
29
|
+
self.name = f"project-{uuid.uuid4().hex[:8]}"
|
|
30
|
+
|
|
31
|
+
# Try to get existing project or create new one
|
|
32
|
+
self._initialize_project()
|
|
33
|
+
|
|
34
|
+
def _initialize_project(self):
|
|
35
|
+
"""Get existing project or create new one"""
|
|
36
|
+
try:
|
|
37
|
+
# Try to get existing project
|
|
38
|
+
project = self.api.get_project_by_name(self.name)
|
|
39
|
+
self.id = project.get("id")
|
|
40
|
+
self._info = project
|
|
41
|
+
except Exception:
|
|
42
|
+
# Project doesn't exist, create it
|
|
43
|
+
project = self.api.create_project(self.name)
|
|
44
|
+
self.id = project.get("id")
|
|
45
|
+
self._info = project
|
|
46
|
+
|
|
47
|
+
def status(self) -> Dict[str, Any]:
|
|
48
|
+
"""Get project status"""
|
|
49
|
+
return self.api.get_project(self.id)
|
|
50
|
+
|
|
51
|
+
def start(self) -> Dict[str, Any]:
|
|
52
|
+
"""Start all computers in the project"""
|
|
53
|
+
return self.api.start_project(self.id)
|
|
54
|
+
|
|
55
|
+
def stop(self) -> Dict[str, Any]:
|
|
56
|
+
"""Stop all computers in the project"""
|
|
57
|
+
return self.api.stop_project(self.id)
|
|
58
|
+
|
|
59
|
+
def restart(self) -> Dict[str, Any]:
|
|
60
|
+
"""Restart all computers in the project"""
|
|
61
|
+
return self.api.restart_project(self.id)
|
|
62
|
+
|
|
63
|
+
def destroy(self) -> Dict[str, Any]:
|
|
64
|
+
"""Delete the project and all its computers"""
|
|
65
|
+
return self.api.delete_project(self.id)
|
|
66
|
+
|
|
67
|
+
def list_computers(self) -> List[Dict[str, Any]]:
|
|
68
|
+
"""List all computers in this project"""
|
|
69
|
+
return self.api.list_computers(self.name)
|
|
70
|
+
|
|
71
|
+
def get_computer(self, computer_name: str = None) -> Optional[Dict[str, Any]]:
|
|
72
|
+
"""Get a specific computer in this project by name, or the first one if no name specified"""
|
|
73
|
+
computers = self.list_computers()
|
|
74
|
+
if not computers:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
if computer_name:
|
|
78
|
+
for computer in computers:
|
|
79
|
+
if computer.get("name") == computer_name:
|
|
80
|
+
return computer
|
|
81
|
+
return None
|
|
82
|
+
else:
|
|
83
|
+
# Return first computer if no name specified
|
|
84
|
+
return computers[0]
|
|
85
|
+
|
|
86
|
+
def __repr__(self):
|
|
87
|
+
return f"Project(name='{self.name}', id='{self.id}')"
|
orgo/prompt.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
# src/orgo/prompt.py
|
|
2
|
+
"""
|
|
3
|
+
Prompt module for interacting with virtual computers using AI models.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import base64
|
|
8
|
+
from typing import Dict, List, Any, Optional, Callable, Union, Protocol
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PromptProvider(Protocol):
|
|
12
|
+
"""Protocol defining the interface for prompt providers."""
|
|
13
|
+
|
|
14
|
+
def execute(self,
|
|
15
|
+
computer_id: str,
|
|
16
|
+
instruction: str,
|
|
17
|
+
callback: Optional[Callable[[str, Any], None]] = None,
|
|
18
|
+
**kwargs) -> List[Dict[str, Any]]:
|
|
19
|
+
"""
|
|
20
|
+
Execute a prompt to control the computer.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
computer_id: ID of the computer to control
|
|
24
|
+
instruction: User instruction
|
|
25
|
+
callback: Optional progress callback function
|
|
26
|
+
**kwargs: Additional provider-specific parameters
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of messages from the conversation
|
|
30
|
+
"""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AnthropicProvider:
|
|
35
|
+
"""Anthropic Claude-based prompt provider."""
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
"""Initialize the Anthropic provider."""
|
|
39
|
+
try:
|
|
40
|
+
import anthropic
|
|
41
|
+
self.anthropic = anthropic
|
|
42
|
+
except ImportError:
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"Anthropic SDK not installed. Please install with 'pip install anthropic'"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def execute(self,
|
|
48
|
+
computer_id: str,
|
|
49
|
+
instruction: str,
|
|
50
|
+
callback: Optional[Callable[[str, Any], None]] = None,
|
|
51
|
+
api_key: Optional[str] = None,
|
|
52
|
+
model: str = "claude-3-7-sonnet-20250219",
|
|
53
|
+
display_width: int = 1024,
|
|
54
|
+
display_height: int = 768,
|
|
55
|
+
orgo_api_key: Optional[str] = None,
|
|
56
|
+
orgo_base_url: Optional[str] = None,
|
|
57
|
+
max_saved_screenshots: int = 2,
|
|
58
|
+
**kwargs) -> List[Dict[str, Any]]:
|
|
59
|
+
"""
|
|
60
|
+
Execute a prompt using Anthropic's Claude.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
computer_id: ID of the computer to control
|
|
64
|
+
instruction: User instruction
|
|
65
|
+
callback: Optional progress callback
|
|
66
|
+
api_key: Anthropic API key
|
|
67
|
+
model: Model to use
|
|
68
|
+
display_width: Display width in pixels
|
|
69
|
+
display_height: Display height in pixels
|
|
70
|
+
orgo_api_key: API key for Orgo (passed to ApiClient)
|
|
71
|
+
orgo_base_url: Base URL for Orgo API (passed to ApiClient)
|
|
72
|
+
max_saved_screenshots: Maximum number of screenshots to maintain in conversation history
|
|
73
|
+
**kwargs: Additional parameters to pass to the Anthropic API
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of messages from the conversation
|
|
77
|
+
"""
|
|
78
|
+
# Get API key from kwargs, env var, or raise error
|
|
79
|
+
api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
80
|
+
if not api_key:
|
|
81
|
+
raise ValueError("No Anthropic API key provided. Set ANTHROPIC_API_KEY environment variable or pass api_key.")
|
|
82
|
+
|
|
83
|
+
# Initialize the client
|
|
84
|
+
client = self.anthropic.Anthropic(api_key=api_key)
|
|
85
|
+
|
|
86
|
+
# Prepare the messages
|
|
87
|
+
messages = [{"role": "user", "content": instruction}]
|
|
88
|
+
|
|
89
|
+
# Set up the system prompt
|
|
90
|
+
system_prompt = f"""You are Claude, an AI assistant that controls a virtual Ubuntu computer with internet access.
|
|
91
|
+
|
|
92
|
+
<SYSTEM_CAPABILITY>
|
|
93
|
+
* You are utilising an Ubuntu virtual machine with a display resolution of {display_width}x{display_height}.
|
|
94
|
+
* You can take screenshots to see the current state and control the computer by clicking, typing, pressing keys, and scrolling.
|
|
95
|
+
* The virtual environment is an Ubuntu system with standard applications.
|
|
96
|
+
* Always start by taking a screenshot to see the current state before performing any actions.
|
|
97
|
+
</SYSTEM_CAPABILITY>
|
|
98
|
+
|
|
99
|
+
<UBUNTU_DESKTOP_GUIDELINES>
|
|
100
|
+
* CRITICAL INSTRUCTION: When opening applications or files on the Ubuntu desktop, you MUST USE DOUBLE-CLICK rather than single-click.
|
|
101
|
+
* Single-click only selects desktop icons but DOES NOT open them. To open desktop icons, you MUST use double-click.
|
|
102
|
+
* Common desktop interactions:
|
|
103
|
+
- Desktop icons: DOUBLE-CLICK to open applications and folders
|
|
104
|
+
- Menu items: SINGLE-CLICK to select options
|
|
105
|
+
- Taskbar icons: SINGLE-CLICK to open applications
|
|
106
|
+
- Window buttons: SINGLE-CLICK to use close, minimize, maximize buttons
|
|
107
|
+
- File browser items: DOUBLE-CLICK to open folders and files
|
|
108
|
+
- When submitting, use the 'Enter' key, not the 'Return' key.
|
|
109
|
+
* If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click.
|
|
110
|
+
</UBUNTU_DESKTOP_GUIDELINES>
|
|
111
|
+
|
|
112
|
+
<SCREENSHOT_GUIDELINES>
|
|
113
|
+
* Be mindful of how many screenshots you take - they consume significant memory.
|
|
114
|
+
* Only take screenshots when you need to see the current state of the screen.
|
|
115
|
+
* Try to batch multiple actions before taking another screenshot.
|
|
116
|
+
* For better performance, limit the number of screenshots you take.
|
|
117
|
+
</SCREENSHOT_GUIDELINES>"""
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
# Define the computer tool per Anthropic's documentation
|
|
121
|
+
tools = [
|
|
122
|
+
{
|
|
123
|
+
"type": "computer_20250124",
|
|
124
|
+
"name": "computer",
|
|
125
|
+
"display_width_px": display_width,
|
|
126
|
+
"display_height_px": display_height,
|
|
127
|
+
"display_number": 1
|
|
128
|
+
}
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
# Start the conversation with Claude
|
|
132
|
+
if callback:
|
|
133
|
+
callback("status", "Starting conversation with Claude")
|
|
134
|
+
|
|
135
|
+
# Track whether we're in the agent loop
|
|
136
|
+
iteration = 0
|
|
137
|
+
max_iterations = kwargs.get("max_iterations", 20) # Default to 20 iterations max
|
|
138
|
+
|
|
139
|
+
# Create an API client with the proper settings
|
|
140
|
+
from .api.client import ApiClient
|
|
141
|
+
api_client = ApiClient(orgo_api_key, orgo_base_url)
|
|
142
|
+
|
|
143
|
+
# Track how many screenshots we've seen so we can prune when needed
|
|
144
|
+
screenshot_count = 0
|
|
145
|
+
|
|
146
|
+
# Start the agent loop
|
|
147
|
+
while iteration < max_iterations:
|
|
148
|
+
iteration += 1
|
|
149
|
+
|
|
150
|
+
# Filter to keep only the N most recent screenshots
|
|
151
|
+
if screenshot_count > max_saved_screenshots:
|
|
152
|
+
self._filter_to_n_most_recent_images(messages, max_saved_screenshots)
|
|
153
|
+
screenshot_count = max_saved_screenshots
|
|
154
|
+
|
|
155
|
+
# Create the request parameters
|
|
156
|
+
request_params = {
|
|
157
|
+
"model": model,
|
|
158
|
+
"max_tokens": kwargs.get("max_tokens", 4096),
|
|
159
|
+
"system": system_prompt,
|
|
160
|
+
"messages": messages,
|
|
161
|
+
"tools": tools,
|
|
162
|
+
"betas": ["computer-use-2025-01-24"],
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
# Add thinking parameter only if explicitly enabled
|
|
166
|
+
if kwargs.get("thinking_enabled"):
|
|
167
|
+
request_params["thinking"] = {
|
|
168
|
+
"type": "enabled",
|
|
169
|
+
"budget_tokens": kwargs.get("thinking_budget", 1024)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Create message request to Claude
|
|
173
|
+
try:
|
|
174
|
+
response = client.beta.messages.create(**request_params)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
if "base64" in str(e).lower():
|
|
177
|
+
# If we get a base64 error, try again after more aggressively filtering images
|
|
178
|
+
if callback:
|
|
179
|
+
callback("error", f"Base64 error detected. Attempting recovery...")
|
|
180
|
+
|
|
181
|
+
# Remove all but the most recent image and try again
|
|
182
|
+
self._filter_to_n_most_recent_images(messages, 1)
|
|
183
|
+
response = client.beta.messages.create(**request_params)
|
|
184
|
+
else:
|
|
185
|
+
# Not a base64 error, re-raise
|
|
186
|
+
raise
|
|
187
|
+
|
|
188
|
+
# Extract the content from the response
|
|
189
|
+
response_content = response.content
|
|
190
|
+
|
|
191
|
+
# Add Claude's response to the conversation history
|
|
192
|
+
assistant_message = {"role": "assistant", "content": response_content}
|
|
193
|
+
messages.append(assistant_message)
|
|
194
|
+
|
|
195
|
+
# Notify callback of any text content
|
|
196
|
+
for block in response_content:
|
|
197
|
+
if block.type == "text" and callback:
|
|
198
|
+
callback("text", block.text)
|
|
199
|
+
elif block.type == "thinking" and callback:
|
|
200
|
+
callback("thinking", block.thinking)
|
|
201
|
+
elif block.type == "tool_use" and callback:
|
|
202
|
+
tool_params = {
|
|
203
|
+
"action": block.name.split(".")[-1],
|
|
204
|
+
**block.input
|
|
205
|
+
}
|
|
206
|
+
callback("tool_use", tool_params)
|
|
207
|
+
|
|
208
|
+
# Check if Claude requested any tool actions
|
|
209
|
+
tool_results = []
|
|
210
|
+
for block in response_content:
|
|
211
|
+
if block.type == "tool_use":
|
|
212
|
+
# Execute the tool action
|
|
213
|
+
result = self._execute_tool(computer_id, block.input, callback, api_client)
|
|
214
|
+
|
|
215
|
+
# Format the result for Claude
|
|
216
|
+
tool_result = {
|
|
217
|
+
"type": "tool_result",
|
|
218
|
+
"tool_use_id": block.id
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# Handle image vs text results
|
|
222
|
+
if isinstance(result, dict) and "type" in result and result["type"] == "image":
|
|
223
|
+
tool_result["content"] = [result]
|
|
224
|
+
# Increment screenshot count when we add a new screenshot
|
|
225
|
+
if block.input.get("action") == "screenshot":
|
|
226
|
+
screenshot_count += 1
|
|
227
|
+
else:
|
|
228
|
+
tool_result["content"] = [{"type": "text", "text": str(result)}]
|
|
229
|
+
|
|
230
|
+
tool_results.append(tool_result)
|
|
231
|
+
|
|
232
|
+
# If no tools were used, Claude is done - return the messages
|
|
233
|
+
if not tool_results:
|
|
234
|
+
if callback:
|
|
235
|
+
callback("status", "Task completed")
|
|
236
|
+
return messages
|
|
237
|
+
|
|
238
|
+
# Add tool results to messages for the next iteration
|
|
239
|
+
messages.append({"role": "user", "content": tool_results})
|
|
240
|
+
|
|
241
|
+
# We've reached the maximum iteration limit
|
|
242
|
+
if callback:
|
|
243
|
+
callback("status", f"Reached maximum iterations ({max_iterations})")
|
|
244
|
+
|
|
245
|
+
return messages
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
if callback:
|
|
249
|
+
callback("error", str(e))
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
def _filter_to_n_most_recent_images(self, messages: List[Dict[str, Any]], max_images: int):
|
|
253
|
+
"""
|
|
254
|
+
Keep only the N most recent images in the conversation history.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
messages: The conversation history
|
|
258
|
+
max_images: Maximum number of images to keep
|
|
259
|
+
"""
|
|
260
|
+
# Find all the image blocks in the conversation history
|
|
261
|
+
image_blocks = []
|
|
262
|
+
|
|
263
|
+
for msg_idx, msg in enumerate(messages):
|
|
264
|
+
if msg["role"] != "user":
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
content = msg.get("content", [])
|
|
268
|
+
if not isinstance(content, list):
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
for content_idx, block in enumerate(content):
|
|
272
|
+
if not isinstance(block, dict):
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
if block.get("type") != "tool_result":
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
block_content = block.get("content", [])
|
|
279
|
+
for content_item_idx, content_item in enumerate(block_content):
|
|
280
|
+
if not isinstance(content_item, dict):
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
if content_item.get("type") == "image" and "source" in content_item:
|
|
284
|
+
image_blocks.append({
|
|
285
|
+
"msg_idx": msg_idx,
|
|
286
|
+
"content_idx": content_idx,
|
|
287
|
+
"block": block,
|
|
288
|
+
"content_item_idx": content_item_idx,
|
|
289
|
+
"content_item": content_item
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
# If we have more images than our limit, remove the oldest ones
|
|
293
|
+
if len(image_blocks) > max_images:
|
|
294
|
+
# Keep only the most recent ones (which are at the end of the list)
|
|
295
|
+
images_to_remove = image_blocks[:-max_images]
|
|
296
|
+
|
|
297
|
+
for img_block in images_to_remove:
|
|
298
|
+
content_item = img_block["content_item"]
|
|
299
|
+
if "source" in content_item and "data" in content_item["source"]:
|
|
300
|
+
# Replace with a minimal valid base64 image (1x1 transparent PNG)
|
|
301
|
+
content_item["source"]["data"] = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
|
|
302
|
+
content_item["source"]["media_type"] = "image/png"
|
|
303
|
+
|
|
304
|
+
def _execute_tool(self,
|
|
305
|
+
computer_id: str,
|
|
306
|
+
params: Dict[str, Any],
|
|
307
|
+
callback: Optional[Callable[[str, Any], None]] = None,
|
|
308
|
+
api_client = None) -> Union[str, Dict[str, Any]]:
|
|
309
|
+
"""Execute a tool action via the API client."""
|
|
310
|
+
action = params.get("action")
|
|
311
|
+
|
|
312
|
+
if callback:
|
|
313
|
+
callback("tool_executing", {"action": action, "params": params})
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
# Use the provided API client or create a new one
|
|
317
|
+
if api_client is None:
|
|
318
|
+
# Import here to avoid circular imports
|
|
319
|
+
from .api.client import ApiClient
|
|
320
|
+
api_client = ApiClient()
|
|
321
|
+
|
|
322
|
+
# Map actions to API methods
|
|
323
|
+
if action == "screenshot":
|
|
324
|
+
response = api_client.get_screenshot(computer_id)
|
|
325
|
+
if callback:
|
|
326
|
+
callback("tool_result", {"type": "image", "action": "screenshot"})
|
|
327
|
+
|
|
328
|
+
# The API now returns a URL instead of base64 data
|
|
329
|
+
# We need to fetch the image from the URL and convert it to base64
|
|
330
|
+
image_url = response.get("image", "")
|
|
331
|
+
|
|
332
|
+
if not image_url:
|
|
333
|
+
raise ValueError("No image URL received from API")
|
|
334
|
+
|
|
335
|
+
# Fetch the image from the URL
|
|
336
|
+
import requests
|
|
337
|
+
img_response = requests.get(image_url)
|
|
338
|
+
img_response.raise_for_status()
|
|
339
|
+
|
|
340
|
+
# Convert to base64
|
|
341
|
+
image_base64 = base64.b64encode(img_response.content).decode('utf-8')
|
|
342
|
+
|
|
343
|
+
return {
|
|
344
|
+
"type": "image",
|
|
345
|
+
"source": {
|
|
346
|
+
"type": "base64",
|
|
347
|
+
"media_type": "image/jpeg",
|
|
348
|
+
"data": image_base64
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
elif action == "left_click":
|
|
353
|
+
if not params.get("coordinate"):
|
|
354
|
+
raise ValueError("Coordinates required for left click")
|
|
355
|
+
x, y = params["coordinate"]
|
|
356
|
+
api_client.left_click(computer_id, x, y)
|
|
357
|
+
if callback:
|
|
358
|
+
callback("tool_result", {"action": "left_click", "x": x, "y": y})
|
|
359
|
+
return f"Left-clicked at ({x}, {y})"
|
|
360
|
+
|
|
361
|
+
elif action == "right_click":
|
|
362
|
+
if not params.get("coordinate"):
|
|
363
|
+
raise ValueError("Coordinates required for right click")
|
|
364
|
+
x, y = params["coordinate"]
|
|
365
|
+
api_client.right_click(computer_id, x, y)
|
|
366
|
+
if callback:
|
|
367
|
+
callback("tool_result", {"action": "right_click", "x": x, "y": y})
|
|
368
|
+
return f"Right-clicked at ({x}, {y})"
|
|
369
|
+
|
|
370
|
+
elif action == "double_click":
|
|
371
|
+
if not params.get("coordinate"):
|
|
372
|
+
raise ValueError("Coordinates required for double click")
|
|
373
|
+
x, y = params["coordinate"]
|
|
374
|
+
api_client.double_click(computer_id, x, y)
|
|
375
|
+
if callback:
|
|
376
|
+
callback("tool_result", {"action": "double_click", "x": x, "y": y})
|
|
377
|
+
return f"Double-clicked at ({x}, {y})"
|
|
378
|
+
|
|
379
|
+
elif action == "type":
|
|
380
|
+
if not params.get("text"):
|
|
381
|
+
raise ValueError("Text required for typing")
|
|
382
|
+
text = params["text"]
|
|
383
|
+
api_client.type_text(computer_id, text)
|
|
384
|
+
if callback:
|
|
385
|
+
callback("tool_result", {"action": "type", "text": text})
|
|
386
|
+
return f"Typed: \"{text}\""
|
|
387
|
+
|
|
388
|
+
elif action == "key":
|
|
389
|
+
if not params.get("text"):
|
|
390
|
+
raise ValueError("Key required for key press")
|
|
391
|
+
key = params["text"]
|
|
392
|
+
# Handle the 'return' key as 'enter' when needed
|
|
393
|
+
if key.lower() == "return":
|
|
394
|
+
key = "enter"
|
|
395
|
+
api_client.key_press(computer_id, key)
|
|
396
|
+
if callback:
|
|
397
|
+
callback("tool_result", {"action": "key", "key": key})
|
|
398
|
+
return f"Pressed key: {key}"
|
|
399
|
+
|
|
400
|
+
elif action == "scroll":
|
|
401
|
+
if not params.get("scroll_direction") or params.get("scroll_amount") is None:
|
|
402
|
+
raise ValueError("Direction and amount required for scrolling")
|
|
403
|
+
direction = params["scroll_direction"]
|
|
404
|
+
amount = params["scroll_amount"]
|
|
405
|
+
api_client.scroll(computer_id, direction, amount)
|
|
406
|
+
if callback:
|
|
407
|
+
callback("tool_result", {"action": "scroll", "direction": direction, "amount": amount})
|
|
408
|
+
return f"Scrolled {direction} by {amount}"
|
|
409
|
+
|
|
410
|
+
elif action == "wait":
|
|
411
|
+
duration = params.get("duration", 1)
|
|
412
|
+
api_client.wait(computer_id, duration)
|
|
413
|
+
if callback:
|
|
414
|
+
callback("tool_result", {"action": "wait", "duration": duration})
|
|
415
|
+
return f"Waited for {duration} second(s)"
|
|
416
|
+
|
|
417
|
+
else:
|
|
418
|
+
error_msg = f"Unsupported action: {action}"
|
|
419
|
+
if callback:
|
|
420
|
+
callback("error", error_msg)
|
|
421
|
+
raise ValueError(error_msg)
|
|
422
|
+
|
|
423
|
+
except Exception as e:
|
|
424
|
+
error_msg = f"Error executing {action}: {str(e)}"
|
|
425
|
+
if callback:
|
|
426
|
+
callback("error", error_msg)
|
|
427
|
+
return f"Error: {error_msg}"
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
# Default provider mapping
|
|
431
|
+
PROVIDER_MAPPING = {
|
|
432
|
+
"anthropic": AnthropicProvider,
|
|
433
|
+
# Add more providers here as needed, e.g.:
|
|
434
|
+
# "openai": OpenAIProvider,
|
|
435
|
+
# "fireworks": FireworksProvider,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def get_provider(provider_name: str = "anthropic") -> PromptProvider:
|
|
440
|
+
"""
|
|
441
|
+
Get a prompt provider by name.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
provider_name: Name of the provider
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
Provider instance
|
|
448
|
+
"""
|
|
449
|
+
if provider_name not in PROVIDER_MAPPING:
|
|
450
|
+
raise ValueError(f"Unknown provider: {provider_name}. Available providers: {', '.join(PROVIDER_MAPPING.keys())}")
|
|
451
|
+
|
|
452
|
+
return PROVIDER_MAPPING[provider_name]()
|
orgo/utils/__init__.py
ADDED
orgo/utils/auth.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# src/orgo/utils/auth.py
|
|
2
|
+
"""Authentication utilities for Orgo SDK"""
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
def get_api_key(api_key: Optional[str] = None) -> str:
|
|
8
|
+
"""Get the Orgo API key from parameters or environment"""
|
|
9
|
+
key = api_key or os.environ.get("ORGO_API_KEY")
|
|
10
|
+
|
|
11
|
+
if not key:
|
|
12
|
+
raise ValueError(
|
|
13
|
+
"API key required. Set ORGO_API_KEY environment variable or pass api_key parameter. "
|
|
14
|
+
"Get a key at https://www.orgo.ai/start"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
return key
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: orgo
|
|
3
|
+
Version: 0.0.32
|
|
4
|
+
Summary: Computers for AI agents
|
|
5
|
+
Author: Orgo Team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://www.orgo.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.orgo.ai
|
|
9
|
+
Requires-Python: >=3.7
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: requests>=2.25.0
|
|
12
|
+
Requires-Dist: pillow>=8.0.0
|
|
13
|
+
|
|
14
|
+
# Orgo SDK
|
|
15
|
+
|
|
16
|
+
Desktop infrastructure for AI agents.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install orgo
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from orgo import Computer
|
|
28
|
+
|
|
29
|
+
# Create computer
|
|
30
|
+
computer = Computer()
|
|
31
|
+
|
|
32
|
+
# Control
|
|
33
|
+
computer.left_click(100, 200)
|
|
34
|
+
computer.type("Hello world")
|
|
35
|
+
computer.key("Enter")
|
|
36
|
+
computer.screenshot() # Returns PIL Image
|
|
37
|
+
|
|
38
|
+
# Execute Python code
|
|
39
|
+
computer.exec("import pyautogui; pyautogui.click(512, 384)")
|
|
40
|
+
|
|
41
|
+
# Cleanup
|
|
42
|
+
computer.shutdown()
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Full documentation: [docs.orgo.ai](https://docs.orgo.ai)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
orgo/__init__.py,sha256=aw3BM7-Wy8jk-mvIWRG2gC4-nsc74s6ZFm1U21NyGeM,171
|
|
2
|
+
orgo/computer.py,sha256=np8g67-ASCci3EmtQweCpOjDzeUMkXfAY9wve9s7tCc,14268
|
|
3
|
+
orgo/project.py,sha256=uVDFa8iyn5OaHzTzjGQhxnF_nVzwkqkqUShiV3M0AWU,3150
|
|
4
|
+
orgo/prompt.py,sha256=ynblwXPTDp_aF1MbGBsY0PIEr9naklDaKFcfSE_EZ6E,19781
|
|
5
|
+
orgo/api/__init__.py,sha256=9Tzb_OPJ5DH7Cg7OrHzpZZUT4ip05alpa9RLDYmnId8,113
|
|
6
|
+
orgo/api/client.py,sha256=apny7V3IYJTyDwn5utukzyECLWT65oo-1EmFRwHL--E,7544
|
|
7
|
+
orgo/utils/__init__.py,sha256=W4G_nwGBf_7jy0w_mfcrkllurYHSRU4B5cMTVYH_uCc,123
|
|
8
|
+
orgo/utils/auth.py,sha256=tPLBJY-6gdBQWLUjUbwIwxHphC3KoRT_XgP3Iykw3Mw,509
|
|
9
|
+
orgo-0.0.32.dist-info/METADATA,sha256=w3RNXo5BW2YlXUgUb1TzkXaJ50RpkPd0Zd-vOllwx_A,822
|
|
10
|
+
orgo-0.0.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
orgo-0.0.32.dist-info/top_level.txt,sha256=q0rYtFji8GbYuhFW8A5Ab9e0j27761IKPhnL0E9xow4,5
|
|
12
|
+
orgo-0.0.32.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
orgo
|