orgo 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.10
4
- Summary: Desktop infrastructure for AI agents
3
+ Version: 0.0.12
4
+ Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://www.orgo.ai
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "orgo"
7
- version = "0.0.10"
8
- description = "Desktop infrastructure for AI agents"
7
+ version = "0.0.12"
8
+ description = "Computers for AI agents"
9
9
  authors = [{name = "Orgo Team"}]
10
10
  license = {text = "MIT"}
11
11
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  """API client for Orgo service"""
2
+ # src/orgo/api/client.py
2
3
 
3
4
  import requests
4
5
  from typing import Dict, Any, Optional
@@ -0,0 +1,176 @@
1
+ """Computer class for interacting with Orgo virtual environments"""
2
+ # src/orgo/computer.py
3
+
4
+ import os
5
+ import io
6
+ import base64
7
+ from typing import Dict, List, Any, Optional, Callable, Union
8
+ from PIL import Image
9
+
10
+ from .api.client import ApiClient
11
+ from .prompt import get_provider
12
+
13
+
14
+ class Computer:
15
+ def __init__(self, project_id=None, api_key=None, config=None, base_api_url=None):
16
+ """
17
+ Initialize an Orgo virtual computer.
18
+
19
+ Args:
20
+ project_id: Existing project ID to connect to (optional)
21
+ api_key: Orgo API key (defaults to ORGO_API_KEY env var)
22
+ config: Configuration for new computer (optional)
23
+ base_api_url: Custom API URL (optional)
24
+ """
25
+ self.api_key = api_key or os.environ.get("ORGO_API_KEY")
26
+ self.base_api_url = base_api_url
27
+ self.api = ApiClient(self.api_key, self.base_api_url)
28
+
29
+ if project_id:
30
+ self.project_id = project_id
31
+ self._info = self.api.connect_computer(project_id)
32
+ else:
33
+ response = self.api.create_computer(config)
34
+ self.project_id = response.get("name")
35
+ self._info = response
36
+
37
+ if not self.project_id:
38
+ raise ValueError("Failed to initialize computer: No project ID returned")
39
+
40
+ def status(self) -> Dict[str, Any]:
41
+ """Get current computer status"""
42
+ return self.api.get_status(self.project_id)
43
+
44
+ def restart(self) -> Dict[str, Any]:
45
+ """Restart the computer"""
46
+ return self.api.restart_computer(self.project_id)
47
+
48
+ def shutdown(self) -> Dict[str, Any]:
49
+ """Terminate the computer instance"""
50
+ return self.api.shutdown_computer(self.project_id)
51
+
52
+ # Navigation methods
53
+ def left_click(self, x: int, y: int) -> Dict[str, Any]:
54
+ """Perform left mouse click at specified coordinates"""
55
+ return self.api.left_click(self.project_id, x, y)
56
+
57
+ def right_click(self, x: int, y: int) -> Dict[str, Any]:
58
+ """Perform right mouse click at specified coordinates"""
59
+ return self.api.right_click(self.project_id, x, y)
60
+
61
+ def double_click(self, x: int, y: int) -> Dict[str, Any]:
62
+ """Perform double click at specified coordinates"""
63
+ return self.api.double_click(self.project_id, x, y)
64
+
65
+ def scroll(self, direction: str = "down", amount: int = 1) -> Dict[str, Any]:
66
+ """Scroll in specified direction and amount"""
67
+ return self.api.scroll(self.project_id, direction, amount)
68
+
69
+ # Input methods
70
+ def type(self, text: str) -> Dict[str, Any]:
71
+ """Type the specified text"""
72
+ return self.api.type_text(self.project_id, text)
73
+
74
+ def key(self, key: str) -> Dict[str, Any]:
75
+ """Press a key or key combination (e.g., "Enter", "ctrl+c")"""
76
+ return self.api.key_press(self.project_id, key)
77
+
78
+ # View methods
79
+ def screenshot(self) -> Image.Image:
80
+ """Capture screenshot and return as PIL Image"""
81
+ response = self.api.get_screenshot(self.project_id)
82
+ img_data = base64.b64decode(response.get("image", ""))
83
+ return Image.open(io.BytesIO(img_data))
84
+
85
+ def screenshot_base64(self) -> str:
86
+ """Capture screenshot and return as base64 string"""
87
+ response = self.api.get_screenshot(self.project_id)
88
+ return response.get("image", "")
89
+
90
+ # Execution methods
91
+ def bash(self, command: str) -> str:
92
+ """Execute a bash command and return output"""
93
+ response = self.api.execute_bash(self.project_id, command)
94
+ return response.get("output", "")
95
+
96
+ def wait(self, seconds: float) -> Dict[str, Any]:
97
+ """Wait for specified number of seconds"""
98
+ return self.api.wait(self.project_id, seconds)
99
+
100
+ # AI control method
101
+ def prompt(self,
102
+ instruction: str,
103
+ provider: str = "anthropic",
104
+ model: str = "claude-3-7-sonnet-20250219",
105
+ display_width: int = 1024,
106
+ display_height: int = 768,
107
+ callback: Optional[Callable[[str, Any], None]] = None,
108
+ thinking_enabled: bool = False,
109
+ thinking_budget: int = 1024,
110
+ max_tokens: int = 4096,
111
+ max_iterations: int = 20,
112
+ max_saved_screenshots: int = 5,
113
+ api_key: Optional[str] = None) -> List[Dict[str, Any]]:
114
+ """
115
+ Control the computer with natural language instructions using an AI assistant.
116
+
117
+ Args:
118
+ instruction: What you want the AI to do with the computer
119
+ provider: AI provider to use (default: "anthropic")
120
+ model: Model to use (default: "claude-3-7-sonnet-20250219")
121
+ display_width: Screen width in pixels
122
+ display_height: Screen height in pixels
123
+ callback: Optional callback function for progress updates
124
+ thinking_enabled: Enable Claude's thinking capability (default: False)
125
+ thinking_budget: Token budget for thinking (default: 1024)
126
+ max_tokens: Maximum tokens for model response
127
+ max_iterations: Maximum number of agent loop iterations
128
+ max_saved_screenshots: Maximum number of screenshots to keep in history (default: 5)
129
+ api_key: API key for the AI provider (defaults to env var)
130
+
131
+ Returns:
132
+ List of messages from the conversation
133
+
134
+ Examples:
135
+ # Simple usage with environment variables
136
+ computer.prompt("Open Firefox and search for Python tutorials")
137
+
138
+ # With explicit API key
139
+ computer.prompt("Open Terminal and list files", api_key="your-anthropic-key")
140
+
141
+ # With callback for progress updates
142
+ computer.prompt("Create a new text file", callback=my_callback_function)
143
+
144
+ # With thinking enabled (Claude 3.7 Sonnet)
145
+ computer.prompt(
146
+ "Analyze a complex webpage",
147
+ thinking_enabled=True
148
+ )
149
+
150
+ # With custom screenshot management
151
+ computer.prompt(
152
+ "Perform a complex multi-step task",
153
+ max_saved_screenshots=10 # Keep more screenshots for complex tasks
154
+ )
155
+ """
156
+ # Get the provider instance
157
+ provider_instance = get_provider(provider)
158
+
159
+ # Execute the prompt
160
+ return provider_instance.execute(
161
+ computer_id=self.project_id,
162
+ instruction=instruction,
163
+ callback=callback,
164
+ api_key=api_key,
165
+ model=model,
166
+ display_width=display_width,
167
+ display_height=display_height,
168
+ thinking_enabled=thinking_enabled,
169
+ thinking_budget=thinking_budget,
170
+ max_tokens=max_tokens,
171
+ max_iterations=max_iterations,
172
+ max_saved_screenshots=max_saved_screenshots,
173
+ # Pass through the Orgo API client configuration
174
+ orgo_api_key=self.api_key,
175
+ orgo_base_url=self.base_api_url
176
+ )
@@ -0,0 +1,434 @@
1
+ """
2
+ Prompt module for interacting with virtual computers using AI models.
3
+ """
4
+
5
+ import os
6
+ import base64
7
+ from typing import Dict, List, Any, Optional, Callable, Union, Protocol
8
+
9
+
10
+ class PromptProvider(Protocol):
11
+ """Protocol defining the interface for prompt providers."""
12
+
13
+ def execute(self,
14
+ computer_id: str,
15
+ instruction: str,
16
+ callback: Optional[Callable[[str, Any], None]] = None,
17
+ **kwargs) -> List[Dict[str, Any]]:
18
+ """
19
+ Execute a prompt to control the computer.
20
+
21
+ Args:
22
+ computer_id: ID of the computer to control
23
+ instruction: User instruction
24
+ callback: Optional progress callback function
25
+ **kwargs: Additional provider-specific parameters
26
+
27
+ Returns:
28
+ List of messages from the conversation
29
+ """
30
+ ...
31
+
32
+
33
+ class AnthropicProvider:
34
+ """Anthropic Claude-based prompt provider."""
35
+
36
+ def __init__(self):
37
+ """Initialize the Anthropic provider."""
38
+ try:
39
+ import anthropic
40
+ self.anthropic = anthropic
41
+ except ImportError:
42
+ raise ImportError(
43
+ "Anthropic SDK not installed. Please install with 'pip install anthropic'"
44
+ )
45
+
46
+ def execute(self,
47
+ computer_id: str,
48
+ instruction: str,
49
+ callback: Optional[Callable[[str, Any], None]] = None,
50
+ api_key: Optional[str] = None,
51
+ model: str = "claude-3-7-sonnet-20250219",
52
+ display_width: int = 1024,
53
+ display_height: int = 768,
54
+ orgo_api_key: Optional[str] = None,
55
+ orgo_base_url: Optional[str] = None,
56
+ max_saved_screenshots: int = 2,
57
+ **kwargs) -> List[Dict[str, Any]]:
58
+ """
59
+ Execute a prompt using Anthropic's Claude.
60
+
61
+ Args:
62
+ computer_id: ID of the computer to control
63
+ instruction: User instruction
64
+ callback: Optional progress callback
65
+ api_key: Anthropic API key
66
+ model: Model to use
67
+ display_width: Display width in pixels
68
+ display_height: Display height in pixels
69
+ orgo_api_key: API key for Orgo (passed to ApiClient)
70
+ orgo_base_url: Base URL for Orgo API (passed to ApiClient)
71
+ max_saved_screenshots: Maximum number of screenshots to maintain in conversation history
72
+ **kwargs: Additional parameters to pass to the Anthropic API
73
+
74
+ Returns:
75
+ List of messages from the conversation
76
+ """
77
+ # Get API key from kwargs, env var, or raise error
78
+ api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
79
+ if not api_key:
80
+ raise ValueError("No Anthropic API key provided. Set ANTHROPIC_API_KEY environment variable or pass api_key.")
81
+
82
+ # Initialize the client
83
+ client = self.anthropic.Anthropic(api_key=api_key)
84
+
85
+ # Prepare the messages
86
+ messages = [{"role": "user", "content": instruction}]
87
+
88
+ # Set up the system prompt
89
+ system_prompt = f"""You are Claude, an AI assistant that controls a virtual Ubuntu computer with internet access.
90
+
91
+ <SYSTEM_CAPABILITY>
92
+ * You are utilising an Ubuntu virtual machine with a display resolution of {display_width}x{display_height}.
93
+ * You can take screenshots to see the current state and control the computer by clicking, typing, pressing keys, and scrolling.
94
+ * The virtual environment is an Ubuntu system with standard applications.
95
+ * Always start by taking a screenshot to see the current state before performing any actions.
96
+ </SYSTEM_CAPABILITY>
97
+
98
+ <UBUNTU_DESKTOP_GUIDELINES>
99
+ * CRITICAL INSTRUCTION: When opening applications or files on the Ubuntu desktop, you MUST USE DOUBLE-CLICK rather than single-click.
100
+ * Single-click only selects desktop icons but DOES NOT open them. To open desktop icons, you MUST use double-click.
101
+ * Common desktop interactions:
102
+ - Desktop icons: DOUBLE-CLICK to open applications and folders
103
+ - Menu items: SINGLE-CLICK to select options
104
+ - Taskbar icons: SINGLE-CLICK to open applications
105
+ - Window buttons: SINGLE-CLICK to use close, minimize, maximize buttons
106
+ - File browser items: DOUBLE-CLICK to open folders and files
107
+ - When submitting, use the 'Enter' key, not the 'Return' key.
108
+ * If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click.
109
+ </UBUNTU_DESKTOP_GUIDELINES>
110
+
111
+ <SCREENSHOT_GUIDELINES>
112
+ * Be mindful of how many screenshots you take - they consume significant memory.
113
+ * Only take screenshots when you need to see the current state of the screen.
114
+ * Try to batch multiple actions before taking another screenshot.
115
+ * For better performance, limit the number of screenshots you take.
116
+ </SCREENSHOT_GUIDELINES>"""
117
+
118
+ try:
119
+ # Define the computer tool per Anthropic's documentation
120
+ tools = [
121
+ {
122
+ "type": "computer_20250124",
123
+ "name": "computer",
124
+ "display_width_px": display_width,
125
+ "display_height_px": display_height,
126
+ "display_number": 1
127
+ }
128
+ ]
129
+
130
+ # Start the conversation with Claude
131
+ if callback:
132
+ callback("status", "Starting conversation with Claude")
133
+
134
+ # Track whether we're in the agent loop
135
+ iteration = 0
136
+ max_iterations = kwargs.get("max_iterations", 20) # Default to 20 iterations max
137
+
138
+ # Create an API client with the proper settings
139
+ from .api.client import ApiClient
140
+ api_client = ApiClient(orgo_api_key, orgo_base_url)
141
+
142
+ # Track how many screenshots we've seen so we can prune when needed
143
+ screenshot_count = 0
144
+
145
+ # Start the agent loop
146
+ while iteration < max_iterations:
147
+ iteration += 1
148
+
149
+ # Filter to keep only the N most recent screenshots
150
+ if screenshot_count > max_saved_screenshots:
151
+ self._filter_to_n_most_recent_images(messages, max_saved_screenshots)
152
+ screenshot_count = max_saved_screenshots
153
+
154
+ # Create the request parameters
155
+ request_params = {
156
+ "model": model,
157
+ "max_tokens": kwargs.get("max_tokens", 4096),
158
+ "system": system_prompt,
159
+ "messages": messages,
160
+ "tools": tools,
161
+ "betas": ["computer-use-2025-01-24"],
162
+ }
163
+
164
+ # Add thinking parameter only if explicitly enabled
165
+ if kwargs.get("thinking_enabled"):
166
+ request_params["thinking"] = {
167
+ "type": "enabled",
168
+ "budget_tokens": kwargs.get("thinking_budget", 1024)
169
+ }
170
+
171
+ # Create message request to Claude
172
+ try:
173
+ response = client.beta.messages.create(**request_params)
174
+ except Exception as e:
175
+ if "base64" in str(e).lower():
176
+ # If we get a base64 error, try again after more aggressively filtering images
177
+ if callback:
178
+ callback("error", f"Base64 error detected. Attempting recovery...")
179
+
180
+ # Remove all but the most recent image and try again
181
+ self._filter_to_n_most_recent_images(messages, 1)
182
+ response = client.beta.messages.create(**request_params)
183
+ else:
184
+ # Not a base64 error, re-raise
185
+ raise
186
+
187
+ # Extract the content from the response
188
+ response_content = response.content
189
+
190
+ # Add Claude's response to the conversation history
191
+ assistant_message = {"role": "assistant", "content": response_content}
192
+ messages.append(assistant_message)
193
+
194
+ # Notify callback of any text content
195
+ for block in response_content:
196
+ if block.type == "text" and callback:
197
+ callback("text", block.text)
198
+ elif block.type == "thinking" and callback:
199
+ callback("thinking", block.thinking)
200
+ elif block.type == "tool_use" and callback:
201
+ tool_params = {
202
+ "action": block.name.split(".")[-1],
203
+ **block.input
204
+ }
205
+ callback("tool_use", tool_params)
206
+
207
+ # Check if Claude requested any tool actions
208
+ tool_results = []
209
+ for block in response_content:
210
+ if block.type == "tool_use":
211
+ # Execute the tool action
212
+ result = self._execute_tool(computer_id, block.input, callback, api_client)
213
+
214
+ # Format the result for Claude
215
+ tool_result = {
216
+ "type": "tool_result",
217
+ "tool_use_id": block.id
218
+ }
219
+
220
+ # Handle image vs text results
221
+ if isinstance(result, dict) and "type" in result and result["type"] == "image":
222
+ tool_result["content"] = [result]
223
+ # Increment screenshot count when we add a new screenshot
224
+ if block.input.get("action") == "screenshot":
225
+ screenshot_count += 1
226
+ else:
227
+ tool_result["content"] = [{"type": "text", "text": str(result)}]
228
+
229
+ tool_results.append(tool_result)
230
+
231
+ # If no tools were used, Claude is done - return the messages
232
+ if not tool_results:
233
+ if callback:
234
+ callback("status", "Task completed")
235
+ return messages
236
+
237
+ # Add tool results to messages for the next iteration
238
+ messages.append({"role": "user", "content": tool_results})
239
+
240
+ # We've reached the maximum iteration limit
241
+ if callback:
242
+ callback("status", f"Reached maximum iterations ({max_iterations})")
243
+
244
+ return messages
245
+
246
+ except Exception as e:
247
+ if callback:
248
+ callback("error", str(e))
249
+ raise
250
+
251
+ def _filter_to_n_most_recent_images(self, messages: List[Dict[str, Any]], max_images: int):
252
+ """
253
+ Keep only the N most recent images in the conversation history.
254
+
255
+ Args:
256
+ messages: The conversation history
257
+ max_images: Maximum number of images to keep
258
+ """
259
+ # Find all the image blocks in the conversation history
260
+ image_blocks = []
261
+
262
+ for msg_idx, msg in enumerate(messages):
263
+ if msg["role"] != "user":
264
+ continue
265
+
266
+ content = msg.get("content", [])
267
+ if not isinstance(content, list):
268
+ continue
269
+
270
+ for content_idx, block in enumerate(content):
271
+ if not isinstance(block, dict):
272
+ continue
273
+
274
+ if block.get("type") != "tool_result":
275
+ continue
276
+
277
+ block_content = block.get("content", [])
278
+ for content_item_idx, content_item in enumerate(block_content):
279
+ if not isinstance(content_item, dict):
280
+ continue
281
+
282
+ if content_item.get("type") == "image" and "source" in content_item:
283
+ image_blocks.append({
284
+ "msg_idx": msg_idx,
285
+ "content_idx": content_idx,
286
+ "block": block,
287
+ "content_item_idx": content_item_idx,
288
+ "content_item": content_item
289
+ })
290
+
291
+ # If we have more images than our limit, remove the oldest ones
292
+ if len(image_blocks) > max_images:
293
+ # Keep only the most recent ones (which are at the end of the list)
294
+ images_to_remove = image_blocks[:-max_images]
295
+
296
+ for img_block in images_to_remove:
297
+ content_item = img_block["content_item"]
298
+ if "source" in content_item and "data" in content_item["source"]:
299
+ # Replace the base64 data with a placeholder
300
+ content_item["source"]["data"] = "[IMAGE DATA REMOVED]"
301
+
302
+ def _execute_tool(self,
303
+ computer_id: str,
304
+ params: Dict[str, Any],
305
+ callback: Optional[Callable[[str, Any], None]] = None,
306
+ api_client = None) -> Union[str, Dict[str, Any]]:
307
+ """Execute a tool action via the API client."""
308
+ action = params.get("action")
309
+
310
+ if callback:
311
+ callback("tool_executing", {"action": action, "params": params})
312
+
313
+ try:
314
+ # Use the provided API client or create a new one
315
+ if api_client is None:
316
+ # Import here to avoid circular imports
317
+ from .api.client import ApiClient
318
+ api_client = ApiClient()
319
+
320
+ # Map actions to API methods
321
+ if action == "screenshot":
322
+ response = api_client.get_screenshot(computer_id)
323
+ if callback:
324
+ callback("tool_result", {"type": "image", "action": "screenshot"})
325
+ return {
326
+ "type": "image",
327
+ "source": {
328
+ "type": "base64",
329
+ "media_type": "image/jpeg",
330
+ "data": response.get("image", "")
331
+ }
332
+ }
333
+
334
+ elif action == "left_click":
335
+ if not params.get("coordinate"):
336
+ raise ValueError("Coordinates required for left click")
337
+ x, y = params["coordinate"]
338
+ api_client.left_click(computer_id, x, y)
339
+ if callback:
340
+ callback("tool_result", {"action": "left_click", "x": x, "y": y})
341
+ return f"Left-clicked at ({x}, {y})"
342
+
343
+ elif action == "right_click":
344
+ if not params.get("coordinate"):
345
+ raise ValueError("Coordinates required for right click")
346
+ x, y = params["coordinate"]
347
+ api_client.right_click(computer_id, x, y)
348
+ if callback:
349
+ callback("tool_result", {"action": "right_click", "x": x, "y": y})
350
+ return f"Right-clicked at ({x}, {y})"
351
+
352
+ elif action == "double_click":
353
+ if not params.get("coordinate"):
354
+ raise ValueError("Coordinates required for double click")
355
+ x, y = params["coordinate"]
356
+ api_client.double_click(computer_id, x, y)
357
+ if callback:
358
+ callback("tool_result", {"action": "double_click", "x": x, "y": y})
359
+ return f"Double-clicked at ({x}, {y})"
360
+
361
+ elif action == "type":
362
+ if not params.get("text"):
363
+ raise ValueError("Text required for typing")
364
+ text = params["text"]
365
+ api_client.type_text(computer_id, text)
366
+ if callback:
367
+ callback("tool_result", {"action": "type", "text": text})
368
+ return f"Typed: \"{text}\""
369
+
370
+ elif action == "key":
371
+ if not params.get("text"):
372
+ raise ValueError("Key required for key press")
373
+ key = params["text"]
374
+ # Handle the 'return' key as 'enter' when needed
375
+ if key.lower() == "return":
376
+ key = "enter"
377
+ api_client.key_press(computer_id, key)
378
+ if callback:
379
+ callback("tool_result", {"action": "key", "key": key})
380
+ return f"Pressed key: {key}"
381
+
382
+ elif action == "scroll":
383
+ if not params.get("scroll_direction") or params.get("scroll_amount") is None:
384
+ raise ValueError("Direction and amount required for scrolling")
385
+ direction = params["scroll_direction"]
386
+ amount = params["scroll_amount"]
387
+ api_client.scroll(computer_id, direction, amount)
388
+ if callback:
389
+ callback("tool_result", {"action": "scroll", "direction": direction, "amount": amount})
390
+ return f"Scrolled {direction} by {amount}"
391
+
392
+ elif action == "wait":
393
+ duration = params.get("duration", 1)
394
+ api_client.wait(computer_id, duration)
395
+ if callback:
396
+ callback("tool_result", {"action": "wait", "duration": duration})
397
+ return f"Waited for {duration} second(s)"
398
+
399
+ else:
400
+ error_msg = f"Unsupported action: {action}"
401
+ if callback:
402
+ callback("error", error_msg)
403
+ raise ValueError(error_msg)
404
+
405
+ except Exception as e:
406
+ error_msg = f"Error executing {action}: {str(e)}"
407
+ if callback:
408
+ callback("error", error_msg)
409
+ return f"Error: {error_msg}"
410
+
411
+
412
+ # Default provider mapping
413
+ PROVIDER_MAPPING = {
414
+ "anthropic": AnthropicProvider,
415
+ # Add more providers here as needed, e.g.:
416
+ # "openai": OpenAIProvider,
417
+ # "fireworks": FireworksProvider,
418
+ }
419
+
420
+
421
+ def get_provider(provider_name: str = "anthropic") -> PromptProvider:
422
+ """
423
+ Get a prompt provider by name.
424
+
425
+ Args:
426
+ provider_name: Name of the provider
427
+
428
+ Returns:
429
+ Provider instance
430
+ """
431
+ if provider_name not in PROVIDER_MAPPING:
432
+ raise ValueError(f"Unknown provider: {provider_name}. Available providers: {', '.join(PROVIDER_MAPPING.keys())}")
433
+
434
+ return PROVIDER_MAPPING[provider_name]()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.10
4
- Summary: Desktop infrastructure for AI agents
3
+ Version: 0.0.12
4
+ Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://www.orgo.ai
@@ -2,6 +2,7 @@ README.md
2
2
  pyproject.toml
3
3
  src/orgo/__init__.py
4
4
  src/orgo/computer.py
5
+ src/orgo/prompt.py
5
6
  src/orgo.egg-info/PKG-INFO
6
7
  src/orgo.egg-info/SOURCES.txt
7
8
  src/orgo.egg-info/dependency_links.txt
@@ -1,84 +0,0 @@
1
- """Computer class for interacting with Orgo virtual environments"""
2
-
3
- import os
4
- import io
5
- import base64
6
- from typing import Dict, Any, Optional
7
- from PIL import Image
8
-
9
- from .api.client import ApiClient
10
-
11
- class Computer:
12
- def __init__(self, project_id=None, api_key=None, config=None, base_api_url=None):
13
- self.api = ApiClient(api_key or os.environ.get("ORGO_API_KEY"), base_api_url)
14
-
15
- if project_id:
16
- self.project_id = project_id
17
- self._info = self.api.connect_computer(project_id)
18
- else:
19
- response = self.api.create_computer(config)
20
- self.project_id = response.get("name")
21
- self._info = response
22
-
23
- if not self.project_id:
24
- raise ValueError("Failed to initialize computer: No project ID returned")
25
-
26
- def status(self) -> Dict[str, Any]:
27
- """Get current computer status"""
28
- return self.api.get_status(self.project_id)
29
-
30
- def restart(self) -> Dict[str, Any]:
31
- """Restart the computer"""
32
- return self.api.restart_computer(self.project_id)
33
-
34
- def shutdown(self) -> Dict[str, Any]:
35
- """Terminate the computer instance"""
36
- return self.api.shutdown_computer(self.project_id)
37
-
38
- # Navigation methods
39
- def left_click(self, x: int, y: int) -> Dict[str, Any]:
40
- """Perform left mouse click at specified coordinates"""
41
- return self.api.left_click(self.project_id, x, y)
42
-
43
- def right_click(self, x: int, y: int) -> Dict[str, Any]:
44
- """Perform right mouse click at specified coordinates"""
45
- return self.api.right_click(self.project_id, x, y)
46
-
47
- def double_click(self, x: int, y: int) -> Dict[str, Any]:
48
- """Perform double click at specified coordinates"""
49
- return self.api.double_click(self.project_id, x, y)
50
-
51
- def scroll(self, direction: str = "down", amount: int = 1) -> Dict[str, Any]:
52
- """Scroll in specified direction and amount"""
53
- return self.api.scroll(self.project_id, direction, amount)
54
-
55
- # Input methods
56
- def type(self, text: str) -> Dict[str, Any]:
57
- """Type the specified text"""
58
- return self.api.type_text(self.project_id, text)
59
-
60
- def key(self, key: str) -> Dict[str, Any]:
61
- """Press a key or key combination (e.g., "Enter", "ctrl+c")"""
62
- return self.api.key_press(self.project_id, key)
63
-
64
- # View methods
65
- def screenshot(self) -> Image.Image:
66
- """Capture screenshot and return as PIL Image"""
67
- response = self.api.get_screenshot(self.project_id)
68
- img_data = base64.b64decode(response.get("image", ""))
69
- return Image.open(io.BytesIO(img_data))
70
-
71
- def screenshot_base64(self) -> str:
72
- """Capture screenshot and return as base64 string"""
73
- response = self.api.get_screenshot(self.project_id)
74
- return response.get("image", "")
75
-
76
- # Execution methods
77
- def bash(self, command: str) -> str:
78
- """Execute a bash command and return output"""
79
- response = self.api.execute_bash(self.project_id, command)
80
- return response.get("output", "")
81
-
82
- def wait(self, seconds: float) -> Dict[str, Any]:
83
- """Wait for specified number of seconds"""
84
- return self.api.wait(self.project_id, seconds)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes