orgo 0.0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orgo/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ # src/orgo/__init__.py
2
+ """Orgo SDK: Desktop infrastructure for AI agents"""
3
+
4
+ from .project import Project
5
+ from .computer import Computer
6
+
7
+ __all__ = ["Project", "Computer"]
orgo/api/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ # src/orgo/api/__init__.py
2
+ """API package for Orgo SDK"""
3
+
4
+ from .client import ApiClient
5
+
6
+ __all__ = ["ApiClient"]
orgo/api/client.py ADDED
@@ -0,0 +1,183 @@
1
+ """API client for Orgo service"""
2
+
3
+ import requests
4
+ from typing import Dict, Any, Optional, List
5
+ import logging
6
+
7
+ from orgo.utils.auth import get_api_key
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class ApiClient:
12
+ def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
13
+ self.api_key = get_api_key(api_key)
14
+ self.base_url = base_url or "https://www.orgo.ai/api"
15
+ self.session = requests.Session()
16
+ self.session.headers.update({
17
+ "Authorization": f"Bearer {self.api_key}",
18
+ "Content-Type": "application/json",
19
+ "Accept": "application/json"
20
+ })
21
+
22
+ def _request(self, method: str, endpoint: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
23
+ url = f"{self.base_url}/{endpoint}"
24
+
25
+ try:
26
+ if method.upper() == "GET":
27
+ response = self.session.get(url, params=data)
28
+ else:
29
+ response = self.session.request(method, url, json=data)
30
+
31
+ response.raise_for_status()
32
+ return response.json()
33
+ except requests.exceptions.RequestException as e:
34
+ # Log the full error for debugging
35
+ logger.debug(f"API request failed: {method} {url}", exc_info=True)
36
+
37
+ if hasattr(e, 'response') and e.response is not None:
38
+ try:
39
+ error_data = e.response.json()
40
+ if 'error' in error_data:
41
+ raise Exception(error_data['error']) from None
42
+ except (ValueError, KeyError):
43
+ pass
44
+ raise Exception(f"Request failed with status {e.response.status_code}") from None
45
+
46
+ # Generic error message without exposing internal details
47
+ raise Exception("Failed to connect to Orgo service. Please check your connection and try again.") from None
48
+
49
+ # Project methods
50
+ def create_project(self, name: str) -> Dict[str, Any]:
51
+ """Create a new named project"""
52
+ return self._request("POST", "projects", {"name": name})
53
+
54
+ def get_project_by_name(self, name: str) -> Dict[str, Any]:
55
+ """Get project details by name"""
56
+ projects = self.list_projects()
57
+ for project in projects:
58
+ if project.get("name") == name:
59
+ return project
60
+ raise Exception(f"Project '{name}' not found") from None
61
+
62
+ def get_project(self, project_id: str) -> Dict[str, Any]:
63
+ """Get project details by ID"""
64
+ return self._request("GET", f"projects/{project_id}")
65
+
66
+ def list_projects(self) -> List[Dict[str, Any]]:
67
+ """List all projects"""
68
+ response = self._request("GET", "projects")
69
+ return response.get("projects", [])
70
+
71
+ def delete_project(self, project_id: str) -> Dict[str, Any]:
72
+ """Delete a project and all its computers"""
73
+ return self._request("DELETE", f"projects/{project_id}")
74
+
75
+ # Computer methods
76
+ def create_computer(self, project_id: str, computer_name: str,
77
+ os: str = "linux", ram: int = 2, cpu: int = 2,
78
+ gpu: str = "none") -> Dict[str, Any]:
79
+ """Create a new computer within a project"""
80
+ return self._request("POST", "computers", {
81
+ "project_id": project_id,
82
+ "name": computer_name,
83
+ "os": os,
84
+ "ram": ram,
85
+ "cpu": cpu,
86
+ "gpu": gpu
87
+ })
88
+
89
+ def list_computers(self, project_id: str) -> List[Dict[str, Any]]:
90
+ """List all computers in a project"""
91
+ project = self.get_project(project_id)
92
+ return project.get("desktops", [])
93
+
94
+ def get_computer(self, computer_id: str) -> Dict[str, Any]:
95
+ """Get computer details"""
96
+ return self._request("GET", f"computers/{computer_id}")
97
+
98
+ def delete_computer(self, computer_id: str) -> Dict[str, Any]:
99
+ """Delete a computer"""
100
+ return self._request("DELETE", f"computers/{computer_id}")
101
+
102
+ def restart_computer(self, computer_id: str) -> Dict[str, Any]:
103
+ """Restart a computer"""
104
+ return self._request("POST", f"computers/{computer_id}/restart")
105
+
106
+ # Computer control methods
107
+ def left_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
108
+ return self._request("POST", f"computers/{computer_id}/click", {
109
+ "button": "left", "x": x, "y": y
110
+ })
111
+
112
+ def right_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
113
+ return self._request("POST", f"computers/{computer_id}/click", {
114
+ "button": "right", "x": x, "y": y
115
+ })
116
+
117
+ def double_click(self, computer_id: str, x: int, y: int) -> Dict[str, Any]:
118
+ return self._request("POST", f"computers/{computer_id}/click", {
119
+ "button": "left", "x": x, "y": y, "double": True
120
+ })
121
+
122
+ def drag(self, computer_id: str, start_x: int, start_y: int,
123
+ end_x: int, end_y: int, button: str = "left",
124
+ duration: float = 0.5) -> Dict[str, Any]:
125
+ """Perform a drag operation from start to end coordinates"""
126
+ return self._request("POST", f"computers/{computer_id}/drag", {
127
+ "start_x": start_x,
128
+ "start_y": start_y,
129
+ "end_x": end_x,
130
+ "end_y": end_y,
131
+ "button": button,
132
+ "duration": duration
133
+ })
134
+
135
+ def scroll(self, computer_id: str, direction: str, amount: int = 3) -> Dict[str, Any]:
136
+ return self._request("POST", f"computers/{computer_id}/scroll", {
137
+ "direction": direction, "amount": amount
138
+ })
139
+
140
+ def type_text(self, computer_id: str, text: str) -> Dict[str, Any]:
141
+ return self._request("POST", f"computers/{computer_id}/type", {
142
+ "text": text
143
+ })
144
+
145
+ def key_press(self, computer_id: str, key: str) -> Dict[str, Any]:
146
+ return self._request("POST", f"computers/{computer_id}/key", {
147
+ "key": key
148
+ })
149
+
150
+ def get_screenshot(self, computer_id: str) -> Dict[str, Any]:
151
+ return self._request("GET", f"computers/{computer_id}/screenshot")
152
+
153
+ def execute_bash(self, computer_id: str, command: str) -> Dict[str, Any]:
154
+ return self._request("POST", f"computers/{computer_id}/bash", {
155
+ "command": command
156
+ })
157
+
158
+ def execute_python(self, computer_id: str, code: str, timeout: int = 10) -> Dict[str, Any]:
159
+ """Execute Python code on the computer"""
160
+ return self._request("POST", f"computers/{computer_id}/exec", {
161
+ "code": code,
162
+ "timeout": timeout
163
+ })
164
+
165
+ def wait(self, computer_id: str, duration: float) -> Dict[str, Any]:
166
+ return self._request("POST", f"computers/{computer_id}/wait", {
167
+ "duration": duration
168
+ })
169
+
170
+ # Streaming methods
171
+ def start_stream(self, computer_id: str, connection_name: str) -> Dict[str, Any]:
172
+ """Start streaming to a configured RTMP connection"""
173
+ return self._request("POST", f"computers/{computer_id}/stream/start", {
174
+ "connection_name": connection_name
175
+ })
176
+
177
+ def stop_stream(self, computer_id: str) -> Dict[str, Any]:
178
+ """Stop the active stream"""
179
+ return self._request("POST", f"computers/{computer_id}/stream/stop")
180
+
181
+ def get_stream_status(self, computer_id: str) -> Dict[str, Any]:
182
+ """Get current stream status"""
183
+ return self._request("GET", f"computers/{computer_id}/stream/status")
orgo/computer.py ADDED
@@ -0,0 +1,326 @@
1
+ """Computer class for interacting with Orgo virtual environments"""
2
+ import os as operating_system
3
+ import base64
4
+ import logging
5
+ import uuid
6
+ import io
7
+ from typing import Dict, List, Any, Optional, Callable, Literal, Union
8
+ from PIL import Image
9
+ import requests
10
+ from requests.exceptions import RequestException
11
+
12
+ from .api.client import ApiClient
13
+ from .prompt import get_provider
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class Computer:
18
+ def __init__(self,
19
+ project: Optional[Union[str, 'Project']] = None,
20
+ name: Optional[str] = None,
21
+ computer_id: Optional[str] = None,
22
+ api_key: Optional[str] = None,
23
+ base_api_url: Optional[str] = None,
24
+ ram: Optional[Literal[1, 2, 4, 8, 16, 32, 64]] = None,
25
+ memory: Optional[Literal[1, 2, 4, 8, 16, 32, 64]] = None,
26
+ cpu: Optional[Literal[1, 2, 4, 8, 16]] = None,
27
+ os: Optional[Literal["linux", "windows"]] = None,
28
+ gpu: Optional[Literal["none", "a10", "l40s", "a100-40gb", "a100-80gb"]] = None):
29
+ """
30
+ Initialize an Orgo virtual computer.
31
+
32
+ Args:
33
+ project: Project name (str) or Project instance. If not provided, creates a new project.
34
+ name: Computer name within the project (optional, auto-generated if not provided)
35
+ computer_id: Existing computer ID to connect to (optional)
36
+ api_key: Orgo API key (defaults to ORGO_API_KEY env var)
37
+ base_api_url: Custom API URL (optional)
38
+ ram/memory: RAM in GB (1, 2, 4, 8, 16, 32, or 64) - only used when creating
39
+ cpu: CPU cores (1, 2, 4, 8, or 16) - only used when creating
40
+ os: Operating system ("linux" or "windows") - only used when creating
41
+ gpu: GPU type - only used when creating
42
+
43
+ Examples:
44
+ # Create computer in new project
45
+ computer = Computer(ram=4, cpu=2)
46
+
47
+ # Create computer in existing project
48
+ computer = Computer(project="manus", ram=4, cpu=2)
49
+
50
+ # Connect to existing computer by ID
51
+ computer = Computer(computer_id="11c4fd46-e069-4c32-be65-f82d9f87b9b8")
52
+ """
53
+ self.api_key = api_key or operating_system.environ.get("ORGO_API_KEY")
54
+ self.base_api_url = base_api_url
55
+ self.api = ApiClient(self.api_key, self.base_api_url)
56
+
57
+ # Handle memory parameter as an alias for ram
58
+ if ram is None and memory is not None:
59
+ ram = memory
60
+
61
+ # Store configuration
62
+ self.os = os or "linux"
63
+ self.ram = ram or 2
64
+ self.cpu = cpu or 2
65
+ self.gpu = gpu or "none"
66
+
67
+ if computer_id:
68
+ # Just store the computer ID, no API call needed
69
+ self.computer_id = computer_id
70
+ self.name = name
71
+ self.project_id = None
72
+ self.project_name = None
73
+ logger.info(f"Connected to computer ID: {self.computer_id}")
74
+ elif project:
75
+ # Work with specified project
76
+ if isinstance(project, str):
77
+ # Project name provided
78
+ self.project_name = project
79
+ self._initialize_with_project_name(project, name)
80
+ else:
81
+ # Project instance provided
82
+ from .project import Project as ProjectClass
83
+ if isinstance(project, ProjectClass):
84
+ self.project_name = project.name
85
+ self.project_id = project.id
86
+ self._initialize_with_project_instance(project, name)
87
+ else:
88
+ raise ValueError("project must be a string (project name) or Project instance")
89
+ else:
90
+ # No project specified, create a new one
91
+ self._create_new_project_and_computer(name)
92
+
93
+ def _initialize_with_project_name(self, project_name: str, computer_name: Optional[str]):
94
+ """Initialize with a project name (create project if needed)"""
95
+ try:
96
+ # Try to get existing project
97
+ project = self.api.get_project_by_name(project_name)
98
+ self.project_id = project.get("id")
99
+
100
+ # Check for existing computers
101
+ computers = self.api.list_computers(self.project_id)
102
+
103
+ if computer_name:
104
+ # Look for specific computer
105
+ existing = next((c for c in computers if c.get("name") == computer_name), None)
106
+ if existing:
107
+ self._connect_to_existing_computer(existing)
108
+ else:
109
+ # Create new computer with specified name
110
+ self._create_computer(self.project_id, computer_name)
111
+ elif computers:
112
+ # No name specified, use first available computer
113
+ self._connect_to_existing_computer(computers[0])
114
+ else:
115
+ # No computers exist, create new one
116
+ self._create_computer(self.project_id, computer_name)
117
+
118
+ except Exception:
119
+ # Project doesn't exist, create it
120
+ logger.info(f"Project {project_name} not found, creating new project")
121
+ project = self.api.create_project(project_name)
122
+ self.project_id = project.get("id")
123
+ self._create_computer(self.project_id, computer_name)
124
+
125
+ def _initialize_with_project_instance(self, project: 'Project', computer_name: Optional[str]):
126
+ """Initialize with a Project instance"""
127
+ computers = project.list_computers()
128
+
129
+ if computer_name:
130
+ # Look for specific computer
131
+ existing = next((c for c in computers if c.get("name") == computer_name), None)
132
+ if existing:
133
+ self._connect_to_existing_computer(existing)
134
+ else:
135
+ # Create new computer with specified name
136
+ self._create_computer(project.id, computer_name)
137
+ elif computers:
138
+ # No name specified, use first available computer
139
+ self._connect_to_existing_computer(computers[0])
140
+ else:
141
+ # No computers exist, create new one
142
+ self._create_computer(project.id, computer_name)
143
+
144
+ def _create_new_project_and_computer(self, computer_name: Optional[str]):
145
+ """Create a new project and computer"""
146
+ # Generate a unique project name
147
+ project_name = f"project-{uuid.uuid4().hex[:8]}"
148
+
149
+ # Create the project
150
+ project = self.api.create_project(project_name)
151
+ self.project_id = project.get("id")
152
+ self.project_name = project_name
153
+
154
+ # Create a computer in the new project
155
+ self._create_computer(self.project_id, computer_name)
156
+
157
+ def _connect_to_existing_computer(self, computer_info: Dict[str, Any]):
158
+ """Connect to an existing computer"""
159
+ self.computer_id = computer_info.get("id")
160
+ self.name = computer_info.get("name")
161
+ logger.info(f"Connected to existing computer {self.name} (ID: {self.computer_id})")
162
+
163
+ def _create_computer(self, project_id: str, computer_name: Optional[str]):
164
+ """Create a new computer in the project"""
165
+ # Generate name if not provided
166
+ if not computer_name:
167
+ computer_name = f"desktop-{uuid.uuid4().hex[:8]}"
168
+
169
+ self.name = computer_name
170
+
171
+ # Validate parameters
172
+ if self.ram not in [1, 2, 4, 8, 16, 32, 64]:
173
+ raise ValueError("ram must be one of: 1, 2, 4, 8, 16, 32, 64 GB")
174
+ if self.cpu not in [1, 2, 4, 8, 16]:
175
+ raise ValueError("cpu must be one of: 1, 2, 4, 8, 16 cores")
176
+ if self.os not in ["linux", "windows"]:
177
+ raise ValueError("os must be either 'linux' or 'windows'")
178
+ if self.gpu not in ["none", "a10", "l40s", "a100-40gb", "a100-80gb"]:
179
+ raise ValueError("gpu must be one of: 'none', 'a10', 'l40s', 'a100-40gb', 'a100-80gb'")
180
+
181
+ computer = self.api.create_computer(
182
+ project_id=project_id,
183
+ computer_name=computer_name,
184
+ os=self.os,
185
+ ram=self.ram,
186
+ cpu=self.cpu,
187
+ gpu=self.gpu
188
+ )
189
+ self.computer_id = computer.get("id")
190
+ logger.info(f"Created new computer {self.name} (ID: {self.computer_id})")
191
+
192
+ def status(self) -> Dict[str, Any]:
193
+ """Get current computer status"""
194
+ return self.api.get_computer(self.computer_id)
195
+
196
+ def restart(self) -> Dict[str, Any]:
197
+ """Restart the computer"""
198
+ return self.api.restart_computer(self.computer_id)
199
+
200
+ def destroy(self) -> Dict[str, Any]:
201
+ """Terminate and delete the computer instance"""
202
+ return self.api.delete_computer(self.computer_id)
203
+
204
+ # Navigation methods
205
+ def left_click(self, x: int, y: int) -> Dict[str, Any]:
206
+ """Perform left mouse click at specified coordinates"""
207
+ return self.api.left_click(self.computer_id, x, y)
208
+
209
+ def right_click(self, x: int, y: int) -> Dict[str, Any]:
210
+ """Perform right mouse click at specified coordinates"""
211
+ return self.api.right_click(self.computer_id, x, y)
212
+
213
+ def double_click(self, x: int, y: int) -> Dict[str, Any]:
214
+ """Perform double click at specified coordinates"""
215
+ return self.api.double_click(self.computer_id, x, y)
216
+
217
+ def drag(self, start_x: int, start_y: int, end_x: int, end_y: int,
218
+ button: str = "left", duration: float = 0.5) -> Dict[str, Any]:
219
+ """Perform a smooth drag operation from start to end coordinates"""
220
+ return self.api.drag(self.computer_id, start_x, start_y, end_x, end_y, button, duration)
221
+
222
+ def scroll(self, direction: str = "down", amount: int = 3) -> Dict[str, Any]:
223
+ """Scroll in specified direction and amount"""
224
+ return self.api.scroll(self.computer_id, direction, amount)
225
+
226
+ # Input methods
227
+ def type(self, text: str) -> Dict[str, Any]:
228
+ """Type the specified text"""
229
+ return self.api.type_text(self.computer_id, text)
230
+
231
+ def key(self, key: str) -> Dict[str, Any]:
232
+ """Press a key or key combination (e.g., "Enter", "ctrl+c")"""
233
+ return self.api.key_press(self.computer_id, key)
234
+
235
+ # View methods
236
+ def screenshot(self) -> Image.Image:
237
+ """Capture screenshot and return as PIL Image"""
238
+ response = self.api.get_screenshot(self.computer_id)
239
+ image_data = response.get("image", "")
240
+
241
+ if image_data.startswith(('http://', 'https://')):
242
+ img_response = requests.get(image_data)
243
+ img_response.raise_for_status()
244
+ return Image.open(io.BytesIO(img_response.content))
245
+ else:
246
+ img_data = base64.b64decode(image_data)
247
+ return Image.open(io.BytesIO(img_data))
248
+
249
+ def screenshot_base64(self) -> str:
250
+ """Capture screenshot and return as base64 string"""
251
+ response = self.api.get_screenshot(self.computer_id)
252
+ image_data = response.get("image", "")
253
+
254
+ if image_data.startswith(('http://', 'https://')):
255
+ img_response = requests.get(image_data)
256
+ img_response.raise_for_status()
257
+ return base64.b64encode(img_response.content).decode('utf-8')
258
+ else:
259
+ return image_data
260
+
261
+ # Execution methods
262
+ def bash(self, command: str) -> str:
263
+ """Execute a bash command and return output"""
264
+ response = self.api.execute_bash(self.computer_id, command)
265
+ return response.get("output", "")
266
+
267
+ def exec(self, code: str, timeout: int = 10) -> Dict[str, Any]:
268
+ """Execute Python code on the remote computer"""
269
+ response = self.api.execute_python(self.computer_id, code, timeout)
270
+ return response
271
+
272
+ def wait(self, seconds: float) -> Dict[str, Any]:
273
+ """Wait for specified number of seconds"""
274
+ return self.api.wait(self.computer_id, seconds)
275
+
276
+ # Streaming methods
277
+ def start_stream(self, connection: str) -> Dict[str, Any]:
278
+ """Start streaming the computer screen to an RTMP server"""
279
+ return self.api.start_stream(self.computer_id, connection)
280
+
281
+ def stop_stream(self) -> Dict[str, Any]:
282
+ """Stop the active stream"""
283
+ return self.api.stop_stream(self.computer_id)
284
+
285
+ def stream_status(self) -> Dict[str, Any]:
286
+ """Get the current streaming status"""
287
+ return self.api.get_stream_status(self.computer_id)
288
+
289
+ # AI control method
290
+ def prompt(self,
291
+ instruction: str,
292
+ provider: str = "anthropic",
293
+ model: str = "claude-3-7-sonnet-20250219",
294
+ display_width: int = 1024,
295
+ display_height: int = 768,
296
+ callback: Optional[Callable[[str, Any], None]] = None,
297
+ thinking_enabled: bool = False,
298
+ thinking_budget: int = 1024,
299
+ max_tokens: int = 4096,
300
+ max_iterations: int = 20,
301
+ max_saved_screenshots: int = 5,
302
+ api_key: Optional[str] = None) -> List[Dict[str, Any]]:
303
+ """Control the computer with natural language instructions using an AI assistant"""
304
+ provider_instance = get_provider(provider)
305
+
306
+ return provider_instance.execute(
307
+ computer_id=self.computer_id,
308
+ instruction=instruction,
309
+ callback=callback,
310
+ api_key=api_key,
311
+ model=model,
312
+ display_width=display_width,
313
+ display_height=display_height,
314
+ thinking_enabled=thinking_enabled,
315
+ thinking_budget=thinking_budget,
316
+ max_tokens=max_tokens,
317
+ max_iterations=max_iterations,
318
+ max_saved_screenshots=max_saved_screenshots,
319
+ orgo_api_key=self.api_key,
320
+ orgo_base_url=self.base_api_url
321
+ )
322
+
323
+ def __repr__(self):
324
+ project_str = f", project='{self.project_name}'" if hasattr(self, 'project_name') and self.project_name else ""
325
+ name_str = f"name='{self.name}'" if hasattr(self, 'name') and self.name else f"id='{self.computer_id}'"
326
+ return f"Computer({name_str}{project_str})"
orgo/project.py ADDED
@@ -0,0 +1,87 @@
1
+ """Project class for managing Orgo projects"""
2
+ import os as operating_system # Renamed to avoid any potential conflicts
3
+ import uuid
4
+ from typing import Dict, List, Any, Optional
5
+
6
+ from .api.client import ApiClient
7
+
8
+ class Project:
9
+ def __init__(self,
10
+ name: Optional[str] = None,
11
+ api_key: Optional[str] = None,
12
+ base_api_url: Optional[str] = None):
13
+ """
14
+ Initialize an Orgo project.
15
+
16
+ Args:
17
+ name: Project name. If exists, connects to it. If not, creates it.
18
+ api_key: Orgo API key (defaults to ORGO_API_KEY env var)
19
+ base_api_url: Custom API URL (optional)
20
+ """
21
+ self.api_key = api_key or operating_system.environ.get("ORGO_API_KEY")
22
+ self.base_api_url = base_api_url
23
+ self.api = ApiClient(self.api_key, self.base_api_url)
24
+
25
+ if name:
26
+ self.name = name
27
+ else:
28
+ # Generate a unique name if not provided
29
+ self.name = f"project-{uuid.uuid4().hex[:8]}"
30
+
31
+ # Try to get existing project or create new one
32
+ self._initialize_project()
33
+
34
+ def _initialize_project(self):
35
+ """Get existing project or create new one"""
36
+ try:
37
+ # Try to get existing project
38
+ project = self.api.get_project_by_name(self.name)
39
+ self.id = project.get("id")
40
+ self._info = project
41
+ except Exception:
42
+ # Project doesn't exist, create it
43
+ project = self.api.create_project(self.name)
44
+ self.id = project.get("id")
45
+ self._info = project
46
+
47
+ def status(self) -> Dict[str, Any]:
48
+ """Get project status"""
49
+ return self.api.get_project(self.id)
50
+
51
+ def start(self) -> Dict[str, Any]:
52
+ """Start all computers in the project"""
53
+ return self.api.start_project(self.id)
54
+
55
+ def stop(self) -> Dict[str, Any]:
56
+ """Stop all computers in the project"""
57
+ return self.api.stop_project(self.id)
58
+
59
+ def restart(self) -> Dict[str, Any]:
60
+ """Restart all computers in the project"""
61
+ return self.api.restart_project(self.id)
62
+
63
+ def destroy(self) -> Dict[str, Any]:
64
+ """Delete the project and all its computers"""
65
+ return self.api.delete_project(self.id)
66
+
67
+ def list_computers(self) -> List[Dict[str, Any]]:
68
+ """List all computers in this project"""
69
+ return self.api.list_computers(self.name)
70
+
71
+ def get_computer(self, computer_name: str = None) -> Optional[Dict[str, Any]]:
72
+ """Get a specific computer in this project by name, or the first one if no name specified"""
73
+ computers = self.list_computers()
74
+ if not computers:
75
+ return None
76
+
77
+ if computer_name:
78
+ for computer in computers:
79
+ if computer.get("name") == computer_name:
80
+ return computer
81
+ return None
82
+ else:
83
+ # Return first computer if no name specified
84
+ return computers[0]
85
+
86
+ def __repr__(self):
87
+ return f"Project(name='{self.name}', id='{self.id}')"
orgo/prompt.py ADDED
@@ -0,0 +1,452 @@
1
+ # src/orgo/prompt.py
2
+ """
3
+ Prompt module for interacting with virtual computers using AI models.
4
+ """
5
+
6
+ import os
7
+ import base64
8
+ from typing import Dict, List, Any, Optional, Callable, Union, Protocol
9
+
10
+
11
+ class PromptProvider(Protocol):
12
+ """Protocol defining the interface for prompt providers."""
13
+
14
+ def execute(self,
15
+ computer_id: str,
16
+ instruction: str,
17
+ callback: Optional[Callable[[str, Any], None]] = None,
18
+ **kwargs) -> List[Dict[str, Any]]:
19
+ """
20
+ Execute a prompt to control the computer.
21
+
22
+ Args:
23
+ computer_id: ID of the computer to control
24
+ instruction: User instruction
25
+ callback: Optional progress callback function
26
+ **kwargs: Additional provider-specific parameters
27
+
28
+ Returns:
29
+ List of messages from the conversation
30
+ """
31
+ ...
32
+
33
+
34
+ class AnthropicProvider:
35
+ """Anthropic Claude-based prompt provider."""
36
+
37
+ def __init__(self):
38
+ """Initialize the Anthropic provider."""
39
+ try:
40
+ import anthropic
41
+ self.anthropic = anthropic
42
+ except ImportError:
43
+ raise ImportError(
44
+ "Anthropic SDK not installed. Please install with 'pip install anthropic'"
45
+ )
46
+
47
+ def execute(self,
48
+ computer_id: str,
49
+ instruction: str,
50
+ callback: Optional[Callable[[str, Any], None]] = None,
51
+ api_key: Optional[str] = None,
52
+ model: str = "claude-3-7-sonnet-20250219",
53
+ display_width: int = 1024,
54
+ display_height: int = 768,
55
+ orgo_api_key: Optional[str] = None,
56
+ orgo_base_url: Optional[str] = None,
57
+ max_saved_screenshots: int = 2,
58
+ **kwargs) -> List[Dict[str, Any]]:
59
+ """
60
+ Execute a prompt using Anthropic's Claude.
61
+
62
+ Args:
63
+ computer_id: ID of the computer to control
64
+ instruction: User instruction
65
+ callback: Optional progress callback
66
+ api_key: Anthropic API key
67
+ model: Model to use
68
+ display_width: Display width in pixels
69
+ display_height: Display height in pixels
70
+ orgo_api_key: API key for Orgo (passed to ApiClient)
71
+ orgo_base_url: Base URL for Orgo API (passed to ApiClient)
72
+ max_saved_screenshots: Maximum number of screenshots to maintain in conversation history
73
+ **kwargs: Additional parameters to pass to the Anthropic API
74
+
75
+ Returns:
76
+ List of messages from the conversation
77
+ """
78
+ # Get API key from kwargs, env var, or raise error
79
+ api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
80
+ if not api_key:
81
+ raise ValueError("No Anthropic API key provided. Set ANTHROPIC_API_KEY environment variable or pass api_key.")
82
+
83
+ # Initialize the client
84
+ client = self.anthropic.Anthropic(api_key=api_key)
85
+
86
+ # Prepare the messages
87
+ messages = [{"role": "user", "content": instruction}]
88
+
89
+ # Set up the system prompt
90
+ system_prompt = f"""You are Claude, an AI assistant that controls a virtual Ubuntu computer with internet access.
91
+
92
+ <SYSTEM_CAPABILITY>
93
+ * You are utilising an Ubuntu virtual machine with a display resolution of {display_width}x{display_height}.
94
+ * You can take screenshots to see the current state and control the computer by clicking, typing, pressing keys, and scrolling.
95
+ * The virtual environment is an Ubuntu system with standard applications.
96
+ * Always start by taking a screenshot to see the current state before performing any actions.
97
+ </SYSTEM_CAPABILITY>
98
+
99
+ <UBUNTU_DESKTOP_GUIDELINES>
100
+ * CRITICAL INSTRUCTION: When opening applications or files on the Ubuntu desktop, you MUST USE DOUBLE-CLICK rather than single-click.
101
+ * Single-click only selects desktop icons but DOES NOT open them. To open desktop icons, you MUST use double-click.
102
+ * Common desktop interactions:
103
+ - Desktop icons: DOUBLE-CLICK to open applications and folders
104
+ - Menu items: SINGLE-CLICK to select options
105
+ - Taskbar icons: SINGLE-CLICK to open applications
106
+ - Window buttons: SINGLE-CLICK to use close, minimize, maximize buttons
107
+ - File browser items: DOUBLE-CLICK to open folders and files
108
+ - When submitting, use the 'Enter' key, not the 'Return' key.
109
+ * If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click.
110
+ </UBUNTU_DESKTOP_GUIDELINES>
111
+
112
+ <SCREENSHOT_GUIDELINES>
113
+ * Be mindful of how many screenshots you take - they consume significant memory.
114
+ * Only take screenshots when you need to see the current state of the screen.
115
+ * Try to batch multiple actions before taking another screenshot.
116
+ * For better performance, limit the number of screenshots you take.
117
+ </SCREENSHOT_GUIDELINES>"""
118
+
119
+ try:
120
+ # Define the computer tool per Anthropic's documentation
121
+ tools = [
122
+ {
123
+ "type": "computer_20250124",
124
+ "name": "computer",
125
+ "display_width_px": display_width,
126
+ "display_height_px": display_height,
127
+ "display_number": 1
128
+ }
129
+ ]
130
+
131
+ # Start the conversation with Claude
132
+ if callback:
133
+ callback("status", "Starting conversation with Claude")
134
+
135
+ # Track whether we're in the agent loop
136
+ iteration = 0
137
+ max_iterations = kwargs.get("max_iterations", 20) # Default to 20 iterations max
138
+
139
+ # Create an API client with the proper settings
140
+ from .api.client import ApiClient
141
+ api_client = ApiClient(orgo_api_key, orgo_base_url)
142
+
143
+ # Track how many screenshots we've seen so we can prune when needed
144
+ screenshot_count = 0
145
+
146
+ # Start the agent loop
147
+ while iteration < max_iterations:
148
+ iteration += 1
149
+
150
+ # Filter to keep only the N most recent screenshots
151
+ if screenshot_count > max_saved_screenshots:
152
+ self._filter_to_n_most_recent_images(messages, max_saved_screenshots)
153
+ screenshot_count = max_saved_screenshots
154
+
155
+ # Create the request parameters
156
+ request_params = {
157
+ "model": model,
158
+ "max_tokens": kwargs.get("max_tokens", 4096),
159
+ "system": system_prompt,
160
+ "messages": messages,
161
+ "tools": tools,
162
+ "betas": ["computer-use-2025-01-24"],
163
+ }
164
+
165
+ # Add thinking parameter only if explicitly enabled
166
+ if kwargs.get("thinking_enabled"):
167
+ request_params["thinking"] = {
168
+ "type": "enabled",
169
+ "budget_tokens": kwargs.get("thinking_budget", 1024)
170
+ }
171
+
172
+ # Create message request to Claude
173
+ try:
174
+ response = client.beta.messages.create(**request_params)
175
+ except Exception as e:
176
+ if "base64" in str(e).lower():
177
+ # If we get a base64 error, try again after more aggressively filtering images
178
+ if callback:
179
+ callback("error", f"Base64 error detected. Attempting recovery...")
180
+
181
+ # Remove all but the most recent image and try again
182
+ self._filter_to_n_most_recent_images(messages, 1)
183
+ response = client.beta.messages.create(**request_params)
184
+ else:
185
+ # Not a base64 error, re-raise
186
+ raise
187
+
188
+ # Extract the content from the response
189
+ response_content = response.content
190
+
191
+ # Add Claude's response to the conversation history
192
+ assistant_message = {"role": "assistant", "content": response_content}
193
+ messages.append(assistant_message)
194
+
195
+ # Notify callback of any text content
196
+ for block in response_content:
197
+ if block.type == "text" and callback:
198
+ callback("text", block.text)
199
+ elif block.type == "thinking" and callback:
200
+ callback("thinking", block.thinking)
201
+ elif block.type == "tool_use" and callback:
202
+ tool_params = {
203
+ "action": block.name.split(".")[-1],
204
+ **block.input
205
+ }
206
+ callback("tool_use", tool_params)
207
+
208
+ # Check if Claude requested any tool actions
209
+ tool_results = []
210
+ for block in response_content:
211
+ if block.type == "tool_use":
212
+ # Execute the tool action
213
+ result = self._execute_tool(computer_id, block.input, callback, api_client)
214
+
215
+ # Format the result for Claude
216
+ tool_result = {
217
+ "type": "tool_result",
218
+ "tool_use_id": block.id
219
+ }
220
+
221
+ # Handle image vs text results
222
+ if isinstance(result, dict) and "type" in result and result["type"] == "image":
223
+ tool_result["content"] = [result]
224
+ # Increment screenshot count when we add a new screenshot
225
+ if block.input.get("action") == "screenshot":
226
+ screenshot_count += 1
227
+ else:
228
+ tool_result["content"] = [{"type": "text", "text": str(result)}]
229
+
230
+ tool_results.append(tool_result)
231
+
232
+ # If no tools were used, Claude is done - return the messages
233
+ if not tool_results:
234
+ if callback:
235
+ callback("status", "Task completed")
236
+ return messages
237
+
238
+ # Add tool results to messages for the next iteration
239
+ messages.append({"role": "user", "content": tool_results})
240
+
241
+ # We've reached the maximum iteration limit
242
+ if callback:
243
+ callback("status", f"Reached maximum iterations ({max_iterations})")
244
+
245
+ return messages
246
+
247
+ except Exception as e:
248
+ if callback:
249
+ callback("error", str(e))
250
+ raise
251
+
252
+ def _filter_to_n_most_recent_images(self, messages: List[Dict[str, Any]], max_images: int):
253
+ """
254
+ Keep only the N most recent images in the conversation history.
255
+
256
+ Args:
257
+ messages: The conversation history
258
+ max_images: Maximum number of images to keep
259
+ """
260
+ # Find all the image blocks in the conversation history
261
+ image_blocks = []
262
+
263
+ for msg_idx, msg in enumerate(messages):
264
+ if msg["role"] != "user":
265
+ continue
266
+
267
+ content = msg.get("content", [])
268
+ if not isinstance(content, list):
269
+ continue
270
+
271
+ for content_idx, block in enumerate(content):
272
+ if not isinstance(block, dict):
273
+ continue
274
+
275
+ if block.get("type") != "tool_result":
276
+ continue
277
+
278
+ block_content = block.get("content", [])
279
+ for content_item_idx, content_item in enumerate(block_content):
280
+ if not isinstance(content_item, dict):
281
+ continue
282
+
283
+ if content_item.get("type") == "image" and "source" in content_item:
284
+ image_blocks.append({
285
+ "msg_idx": msg_idx,
286
+ "content_idx": content_idx,
287
+ "block": block,
288
+ "content_item_idx": content_item_idx,
289
+ "content_item": content_item
290
+ })
291
+
292
+ # If we have more images than our limit, remove the oldest ones
293
+ if len(image_blocks) > max_images:
294
+ # Keep only the most recent ones (which are at the end of the list)
295
+ images_to_remove = image_blocks[:-max_images]
296
+
297
+ for img_block in images_to_remove:
298
+ content_item = img_block["content_item"]
299
+ if "source" in content_item and "data" in content_item["source"]:
300
+ # Replace with a minimal valid base64 image (1x1 transparent PNG)
301
+ content_item["source"]["data"] = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
302
+ content_item["source"]["media_type"] = "image/png"
303
+
304
+ def _execute_tool(self,
305
+ computer_id: str,
306
+ params: Dict[str, Any],
307
+ callback: Optional[Callable[[str, Any], None]] = None,
308
+ api_client = None) -> Union[str, Dict[str, Any]]:
309
+ """Execute a tool action via the API client."""
310
+ action = params.get("action")
311
+
312
+ if callback:
313
+ callback("tool_executing", {"action": action, "params": params})
314
+
315
+ try:
316
+ # Use the provided API client or create a new one
317
+ if api_client is None:
318
+ # Import here to avoid circular imports
319
+ from .api.client import ApiClient
320
+ api_client = ApiClient()
321
+
322
+ # Map actions to API methods
323
+ if action == "screenshot":
324
+ response = api_client.get_screenshot(computer_id)
325
+ if callback:
326
+ callback("tool_result", {"type": "image", "action": "screenshot"})
327
+
328
+ # The API now returns a URL instead of base64 data
329
+ # We need to fetch the image from the URL and convert it to base64
330
+ image_url = response.get("image", "")
331
+
332
+ if not image_url:
333
+ raise ValueError("No image URL received from API")
334
+
335
+ # Fetch the image from the URL
336
+ import requests
337
+ img_response = requests.get(image_url)
338
+ img_response.raise_for_status()
339
+
340
+ # Convert to base64
341
+ image_base64 = base64.b64encode(img_response.content).decode('utf-8')
342
+
343
+ return {
344
+ "type": "image",
345
+ "source": {
346
+ "type": "base64",
347
+ "media_type": "image/jpeg",
348
+ "data": image_base64
349
+ }
350
+ }
351
+
352
+ elif action == "left_click":
353
+ if not params.get("coordinate"):
354
+ raise ValueError("Coordinates required for left click")
355
+ x, y = params["coordinate"]
356
+ api_client.left_click(computer_id, x, y)
357
+ if callback:
358
+ callback("tool_result", {"action": "left_click", "x": x, "y": y})
359
+ return f"Left-clicked at ({x}, {y})"
360
+
361
+ elif action == "right_click":
362
+ if not params.get("coordinate"):
363
+ raise ValueError("Coordinates required for right click")
364
+ x, y = params["coordinate"]
365
+ api_client.right_click(computer_id, x, y)
366
+ if callback:
367
+ callback("tool_result", {"action": "right_click", "x": x, "y": y})
368
+ return f"Right-clicked at ({x}, {y})"
369
+
370
+ elif action == "double_click":
371
+ if not params.get("coordinate"):
372
+ raise ValueError("Coordinates required for double click")
373
+ x, y = params["coordinate"]
374
+ api_client.double_click(computer_id, x, y)
375
+ if callback:
376
+ callback("tool_result", {"action": "double_click", "x": x, "y": y})
377
+ return f"Double-clicked at ({x}, {y})"
378
+
379
+ elif action == "type":
380
+ if not params.get("text"):
381
+ raise ValueError("Text required for typing")
382
+ text = params["text"]
383
+ api_client.type_text(computer_id, text)
384
+ if callback:
385
+ callback("tool_result", {"action": "type", "text": text})
386
+ return f"Typed: \"{text}\""
387
+
388
+ elif action == "key":
389
+ if not params.get("text"):
390
+ raise ValueError("Key required for key press")
391
+ key = params["text"]
392
+ # Handle the 'return' key as 'enter' when needed
393
+ if key.lower() == "return":
394
+ key = "enter"
395
+ api_client.key_press(computer_id, key)
396
+ if callback:
397
+ callback("tool_result", {"action": "key", "key": key})
398
+ return f"Pressed key: {key}"
399
+
400
+ elif action == "scroll":
401
+ if not params.get("scroll_direction") or params.get("scroll_amount") is None:
402
+ raise ValueError("Direction and amount required for scrolling")
403
+ direction = params["scroll_direction"]
404
+ amount = params["scroll_amount"]
405
+ api_client.scroll(computer_id, direction, amount)
406
+ if callback:
407
+ callback("tool_result", {"action": "scroll", "direction": direction, "amount": amount})
408
+ return f"Scrolled {direction} by {amount}"
409
+
410
+ elif action == "wait":
411
+ duration = params.get("duration", 1)
412
+ api_client.wait(computer_id, duration)
413
+ if callback:
414
+ callback("tool_result", {"action": "wait", "duration": duration})
415
+ return f"Waited for {duration} second(s)"
416
+
417
+ else:
418
+ error_msg = f"Unsupported action: {action}"
419
+ if callback:
420
+ callback("error", error_msg)
421
+ raise ValueError(error_msg)
422
+
423
+ except Exception as e:
424
+ error_msg = f"Error executing {action}: {str(e)}"
425
+ if callback:
426
+ callback("error", error_msg)
427
+ return f"Error: {error_msg}"
428
+
429
+
430
+ # Default provider mapping
431
+ PROVIDER_MAPPING = {
432
+ "anthropic": AnthropicProvider,
433
+ # Add more providers here as needed, e.g.:
434
+ # "openai": OpenAIProvider,
435
+ # "fireworks": FireworksProvider,
436
+ }
437
+
438
+
439
+ def get_provider(provider_name: str = "anthropic") -> PromptProvider:
440
+ """
441
+ Get a prompt provider by name.
442
+
443
+ Args:
444
+ provider_name: Name of the provider
445
+
446
+ Returns:
447
+ Provider instance
448
+ """
449
+ if provider_name not in PROVIDER_MAPPING:
450
+ raise ValueError(f"Unknown provider: {provider_name}. Available providers: {', '.join(PROVIDER_MAPPING.keys())}")
451
+
452
+ return PROVIDER_MAPPING[provider_name]()
orgo/utils/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ # src/orgo/utils/__init__.py
2
+ """Utility functions for Orgo SDK"""
3
+
4
+ from .auth import get_api_key
5
+
6
+ __all__ = ["get_api_key"]
orgo/utils/auth.py ADDED
@@ -0,0 +1,17 @@
1
+ # src/orgo/utils/auth.py
2
+ """Authentication utilities for Orgo SDK"""
3
+
4
+ import os
5
+ from typing import Optional
6
+
7
+ def get_api_key(api_key: Optional[str] = None) -> str:
8
+ """Get the Orgo API key from parameters or environment"""
9
+ key = api_key or os.environ.get("ORGO_API_KEY")
10
+
11
+ if not key:
12
+ raise ValueError(
13
+ "API key required. Set ORGO_API_KEY environment variable or pass api_key parameter. "
14
+ "Get a key at https://www.orgo.ai/start"
15
+ )
16
+
17
+ return key
@@ -0,0 +1,45 @@
1
+ Metadata-Version: 2.4
2
+ Name: orgo
3
+ Version: 0.0.32
4
+ Summary: Computers for AI agents
5
+ Author: Orgo Team
6
+ License: MIT
7
+ Project-URL: Homepage, https://www.orgo.ai
8
+ Project-URL: Documentation, https://docs.orgo.ai
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: requests>=2.25.0
12
+ Requires-Dist: pillow>=8.0.0
13
+
14
+ # Orgo SDK
15
+
16
+ Desktop infrastructure for AI agents.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install orgo
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ from orgo import Computer
28
+
29
+ # Create computer
30
+ computer = Computer()
31
+
32
+ # Control
33
+ computer.left_click(100, 200)
34
+ computer.type("Hello world")
35
+ computer.key("Enter")
36
+ computer.screenshot() # Returns PIL Image
37
+
38
+ # Execute Python code
39
+ computer.exec("import pyautogui; pyautogui.click(512, 384)")
40
+
41
+ # Cleanup
42
+ computer.shutdown()
43
+ ```
44
+
45
+ Full documentation: [docs.orgo.ai](https://docs.orgo.ai)
@@ -0,0 +1,12 @@
1
+ orgo/__init__.py,sha256=aw3BM7-Wy8jk-mvIWRG2gC4-nsc74s6ZFm1U21NyGeM,171
2
+ orgo/computer.py,sha256=np8g67-ASCci3EmtQweCpOjDzeUMkXfAY9wve9s7tCc,14268
3
+ orgo/project.py,sha256=uVDFa8iyn5OaHzTzjGQhxnF_nVzwkqkqUShiV3M0AWU,3150
4
+ orgo/prompt.py,sha256=ynblwXPTDp_aF1MbGBsY0PIEr9naklDaKFcfSE_EZ6E,19781
5
+ orgo/api/__init__.py,sha256=9Tzb_OPJ5DH7Cg7OrHzpZZUT4ip05alpa9RLDYmnId8,113
6
+ orgo/api/client.py,sha256=apny7V3IYJTyDwn5utukzyECLWT65oo-1EmFRwHL--E,7544
7
+ orgo/utils/__init__.py,sha256=W4G_nwGBf_7jy0w_mfcrkllurYHSRU4B5cMTVYH_uCc,123
8
+ orgo/utils/auth.py,sha256=tPLBJY-6gdBQWLUjUbwIwxHphC3KoRT_XgP3Iykw3Mw,509
9
+ orgo-0.0.32.dist-info/METADATA,sha256=w3RNXo5BW2YlXUgUb1TzkXaJ50RpkPd0Zd-vOllwx_A,822
10
+ orgo-0.0.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ orgo-0.0.32.dist-info/top_level.txt,sha256=q0rYtFji8GbYuhFW8A5Ab9e0j27761IKPhnL0E9xow4,5
12
+ orgo-0.0.32.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ orgo