orgo 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.9
4
- Summary: Desktop infrastructure for AI agents
3
+ Version: 0.0.11
4
+ Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://www.orgo.ai
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "orgo"
7
- version = "0.0.9"
8
- description = "Desktop infrastructure for AI agents"
7
+ version = "0.0.11"
8
+ description = "Computers for AI agents"
9
9
  authors = [{name = "Orgo Team"}]
10
10
  license = {text = "MIT"}
11
11
  readme = "README.md"
@@ -1,4 +1,5 @@
1
1
  """API client for Orgo service"""
2
+ # src/orgo/api/client.py
2
3
 
3
4
  import requests
4
5
  from typing import Dict, Any, Optional
@@ -6,10 +7,9 @@ from typing import Dict, Any, Optional
6
7
  from orgo.utils.auth import get_api_key
7
8
 
8
9
  class ApiClient:
9
- BASE_URL = "https://www.orgo.ai/api"
10
-
11
- def __init__(self, api_key: Optional[str] = None):
10
+ def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
12
11
  self.api_key = get_api_key(api_key)
12
+ self.base_url = base_url or "https://www.orgo.ai/api"
13
13
  self.session = requests.Session()
14
14
  self.session.headers.update({
15
15
  "Authorization": f"Bearer {self.api_key}",
@@ -18,7 +18,7 @@ class ApiClient:
18
18
  })
19
19
 
20
20
  def _request(self, method: str, endpoint: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
21
- url = f"{self.BASE_URL}/{endpoint}"
21
+ url = f"{self.base_url}/{endpoint}"
22
22
 
23
23
  try:
24
24
  if method.upper() == "GET":
@@ -0,0 +1,176 @@
1
+ """Computer class for interacting with Orgo virtual environments"""
2
+ # src/orgo/computer.py
3
+
4
+ import os
5
+ import io
6
+ import base64
7
+ from typing import Dict, List, Any, Optional, Callable, Union
8
+ from PIL import Image
9
+
10
+ from .api.client import ApiClient
11
+ from .prompt import get_provider
12
+
13
+
14
+ class Computer:
15
+ def __init__(self, project_id=None, api_key=None, config=None, base_api_url=None):
16
+ """
17
+ Initialize an Orgo virtual computer.
18
+
19
+ Args:
20
+ project_id: Existing project ID to connect to (optional)
21
+ api_key: Orgo API key (defaults to ORGO_API_KEY env var)
22
+ config: Configuration for new computer (optional)
23
+ base_api_url: Custom API URL (optional)
24
+ """
25
+ self.api_key = api_key or os.environ.get("ORGO_API_KEY")
26
+ self.base_api_url = base_api_url
27
+ self.api = ApiClient(self.api_key, self.base_api_url)
28
+
29
+ if project_id:
30
+ self.project_id = project_id
31
+ self._info = self.api.connect_computer(project_id)
32
+ else:
33
+ response = self.api.create_computer(config)
34
+ self.project_id = response.get("name")
35
+ self._info = response
36
+
37
+ if not self.project_id:
38
+ raise ValueError("Failed to initialize computer: No project ID returned")
39
+
40
+ def status(self) -> Dict[str, Any]:
41
+ """Get current computer status"""
42
+ return self.api.get_status(self.project_id)
43
+
44
+ def restart(self) -> Dict[str, Any]:
45
+ """Restart the computer"""
46
+ return self.api.restart_computer(self.project_id)
47
+
48
+ def shutdown(self) -> Dict[str, Any]:
49
+ """Terminate the computer instance"""
50
+ return self.api.shutdown_computer(self.project_id)
51
+
52
+ # Navigation methods
53
+ def left_click(self, x: int, y: int) -> Dict[str, Any]:
54
+ """Perform left mouse click at specified coordinates"""
55
+ return self.api.left_click(self.project_id, x, y)
56
+
57
+ def right_click(self, x: int, y: int) -> Dict[str, Any]:
58
+ """Perform right mouse click at specified coordinates"""
59
+ return self.api.right_click(self.project_id, x, y)
60
+
61
+ def double_click(self, x: int, y: int) -> Dict[str, Any]:
62
+ """Perform double click at specified coordinates"""
63
+ return self.api.double_click(self.project_id, x, y)
64
+
65
+ def scroll(self, direction: str = "down", amount: int = 1) -> Dict[str, Any]:
66
+ """Scroll in specified direction and amount"""
67
+ return self.api.scroll(self.project_id, direction, amount)
68
+
69
+ # Input methods
70
+ def type(self, text: str) -> Dict[str, Any]:
71
+ """Type the specified text"""
72
+ return self.api.type_text(self.project_id, text)
73
+
74
+ def key(self, key: str) -> Dict[str, Any]:
75
+ """Press a key or key combination (e.g., "Enter", "ctrl+c")"""
76
+ return self.api.key_press(self.project_id, key)
77
+
78
+ # View methods
79
+ def screenshot(self) -> Image.Image:
80
+ """Capture screenshot and return as PIL Image"""
81
+ response = self.api.get_screenshot(self.project_id)
82
+ img_data = base64.b64decode(response.get("image", ""))
83
+ return Image.open(io.BytesIO(img_data))
84
+
85
+ def screenshot_base64(self) -> str:
86
+ """Capture screenshot and return as base64 string"""
87
+ response = self.api.get_screenshot(self.project_id)
88
+ return response.get("image", "")
89
+
90
+ # Execution methods
91
+ def bash(self, command: str) -> str:
92
+ """Execute a bash command and return output"""
93
+ response = self.api.execute_bash(self.project_id, command)
94
+ return response.get("output", "")
95
+
96
+ def wait(self, seconds: float) -> Dict[str, Any]:
97
+ """Wait for specified number of seconds"""
98
+ return self.api.wait(self.project_id, seconds)
99
+
100
+ # AI control method
101
+ def prompt(self,
102
+ instruction: str,
103
+ provider: str = "anthropic",
104
+ model: str = "claude-3-7-sonnet-20250219",
105
+ display_width: int = 1024,
106
+ display_height: int = 768,
107
+ callback: Optional[Callable[[str, Any], None]] = None,
108
+ thinking_enabled: bool = False,
109
+ thinking_budget: int = 1024,
110
+ max_tokens: int = 4096,
111
+ max_iterations: int = 20,
112
+ max_saved_screenshots: int = 5,
113
+ api_key: Optional[str] = None) -> List[Dict[str, Any]]:
114
+ """
115
+ Control the computer with natural language instructions using an AI assistant.
116
+
117
+ Args:
118
+ instruction: What you want the AI to do with the computer
119
+ provider: AI provider to use (default: "anthropic")
120
+ model: Model to use (default: "claude-3-7-sonnet-20250219")
121
+ display_width: Screen width in pixels
122
+ display_height: Screen height in pixels
123
+ callback: Optional callback function for progress updates
124
+ thinking_enabled: Enable Claude's thinking capability (default: False)
125
+ thinking_budget: Token budget for thinking (default: 1024)
126
+ max_tokens: Maximum tokens for model response
127
+ max_iterations: Maximum number of agent loop iterations
128
+ max_saved_screenshots: Maximum number of screenshots to keep in history (default: 5)
129
+ api_key: API key for the AI provider (defaults to env var)
130
+
131
+ Returns:
132
+ List of messages from the conversation
133
+
134
+ Examples:
135
+ # Simple usage with environment variables
136
+ computer.prompt("Open Firefox and search for Python tutorials")
137
+
138
+ # With explicit API key
139
+ computer.prompt("Open Terminal and list files", api_key="your-anthropic-key")
140
+
141
+ # With callback for progress updates
142
+ computer.prompt("Create a new text file", callback=my_callback_function)
143
+
144
+ # With thinking enabled (Claude 3.7 Sonnet)
145
+ computer.prompt(
146
+ "Analyze a complex webpage",
147
+ thinking_enabled=True
148
+ )
149
+
150
+ # With custom screenshot management
151
+ computer.prompt(
152
+ "Perform a complex multi-step task",
153
+ max_saved_screenshots=10 # Keep more screenshots for complex tasks
154
+ )
155
+ """
156
+ # Get the provider instance
157
+ provider_instance = get_provider(provider)
158
+
159
+ # Execute the prompt
160
+ return provider_instance.execute(
161
+ computer_id=self.project_id,
162
+ instruction=instruction,
163
+ callback=callback,
164
+ api_key=api_key,
165
+ model=model,
166
+ display_width=display_width,
167
+ display_height=display_height,
168
+ thinking_enabled=thinking_enabled,
169
+ thinking_budget=thinking_budget,
170
+ max_tokens=max_tokens,
171
+ max_iterations=max_iterations,
172
+ max_saved_screenshots=max_saved_screenshots,
173
+ # Pass through the Orgo API client configuration
174
+ orgo_api_key=self.api_key,
175
+ orgo_base_url=self.base_api_url
176
+ )
@@ -0,0 +1,405 @@
1
+ """
2
+ Prompt module for interacting with virtual computers using AI models.
3
+ """
4
+
5
+ import os
6
+ from typing import Dict, List, Any, Optional, Callable, Union, Protocol
7
+
8
+
9
+ class PromptProvider(Protocol):
10
+ """Protocol defining the interface for prompt providers."""
11
+
12
+ def execute(self,
13
+ computer_id: str,
14
+ instruction: str,
15
+ callback: Optional[Callable[[str, Any], None]] = None,
16
+ **kwargs) -> List[Dict[str, Any]]:
17
+ """
18
+ Execute a prompt to control the computer.
19
+
20
+ Args:
21
+ computer_id: ID of the computer to control
22
+ instruction: User instruction
23
+ callback: Optional progress callback function
24
+ **kwargs: Additional provider-specific parameters
25
+
26
+ Returns:
27
+ List of messages from the conversation
28
+ """
29
+ ...
30
+
31
+
32
+ class AnthropicProvider:
33
+ """Anthropic Claude-based prompt provider."""
34
+
35
+ def __init__(self):
36
+ """Initialize the Anthropic provider."""
37
+ try:
38
+ import anthropic
39
+ self.anthropic = anthropic
40
+ except ImportError:
41
+ raise ImportError(
42
+ "Anthropic SDK not installed. Please install with 'pip install anthropic'"
43
+ )
44
+
45
+ def execute(self,
46
+ computer_id: str,
47
+ instruction: str,
48
+ callback: Optional[Callable[[str, Any], None]] = None,
49
+ api_key: Optional[str] = None,
50
+ model: str = "claude-3-7-sonnet-20250219",
51
+ display_width: int = 1024,
52
+ display_height: int = 768,
53
+ orgo_api_key: Optional[str] = None,
54
+ orgo_base_url: Optional[str] = None,
55
+ max_saved_screenshots: int = 2,
56
+ **kwargs) -> List[Dict[str, Any]]:
57
+ """
58
+ Execute a prompt using Anthropic's Claude.
59
+
60
+ Args:
61
+ computer_id: ID of the computer to control
62
+ instruction: User instruction
63
+ callback: Optional progress callback
64
+ api_key: Anthropic API key
65
+ model: Model to use
66
+ display_width: Display width in pixels
67
+ display_height: Display height in pixels
68
+ orgo_api_key: API key for Orgo (passed to ApiClient)
69
+ orgo_base_url: Base URL for Orgo API (passed to ApiClient)
70
+ max_saved_screenshots: Maximum number of screenshots to maintain in conversation history
71
+ **kwargs: Additional parameters to pass to the Anthropic API
72
+
73
+ Returns:
74
+ List of messages from the conversation
75
+ """
76
+ # Get API key from kwargs, env var, or raise error
77
+ api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
78
+ if not api_key:
79
+ raise ValueError("No Anthropic API key provided. Set ANTHROPIC_API_KEY environment variable or pass api_key.")
80
+
81
+ # Initialize the client
82
+ client = self.anthropic.Anthropic(api_key=api_key)
83
+
84
+ # Prepare the messages
85
+ messages = [{"role": "user", "content": instruction}]
86
+
87
+ # Set up the system prompt
88
+ system_prompt = f"""You are Claude, an AI assistant that controls a virtual Ubuntu computer with internet access.
89
+
90
+ <SYSTEM_CAPABILITY>
91
+ * You are utilising an Ubuntu virtual machine with a display resolution of {display_width}x{display_height}.
92
+ * You can take screenshots to see the current state and control the computer by clicking, typing, pressing keys, and scrolling.
93
+ * The virtual environment is an Ubuntu system with standard applications.
94
+ * Always start by taking a screenshot to see the current state before performing any actions.
95
+ </SYSTEM_CAPABILITY>
96
+
97
+ <UBUNTU_DESKTOP_GUIDELINES>
98
+ * CRITICAL INSTRUCTION: When opening applications or files on the Ubuntu desktop, you MUST USE DOUBLE-CLICK rather than single-click.
99
+ * Single-click only selects desktop icons but DOES NOT open them. To open desktop icons, you MUST use double-click.
100
+ * Common desktop interactions:
101
+ - Desktop icons: DOUBLE-CLICK to open applications and folders
102
+ - Menu items: SINGLE-CLICK to select options
103
+ - Taskbar icons: SINGLE-CLICK to open applications
104
+ - Window buttons: SINGLE-CLICK to use close, minimize, maximize buttons
105
+ - File browser items: DOUBLE-CLICK to open folders and files
106
+ - When submitting, use the 'Enter' key, not the 'Return' key.
107
+ * If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click.
108
+ </UBUNTU_DESKTOP_GUIDELINES>"""
109
+
110
+ try:
111
+ # Define the computer tool per Anthropic's documentation
112
+ tools = [
113
+ {
114
+ "type": "computer_20250124",
115
+ "name": "computer",
116
+ "display_width_px": display_width,
117
+ "display_height_px": display_height,
118
+ "display_number": 1
119
+ }
120
+ ]
121
+
122
+ # Start the conversation with Claude
123
+ if callback:
124
+ callback("status", "Starting conversation with Claude")
125
+
126
+ # Track whether we're in the agent loop
127
+ iteration = 0
128
+ max_iterations = kwargs.get("max_iterations", 20) # Default to 20 iterations max
129
+
130
+ # Create an API client with the proper settings
131
+ from .api.client import ApiClient
132
+ api_client = ApiClient(orgo_api_key, orgo_base_url)
133
+
134
+ # Track how many screenshots we've seen so we can prune when needed
135
+ screenshot_count = 0
136
+
137
+ # Start the agent loop
138
+ while iteration < max_iterations:
139
+ iteration += 1
140
+
141
+ # Prune old screenshots if we've exceeded our limit
142
+ if screenshot_count > max_saved_screenshots:
143
+ self._prune_old_screenshots(messages, screenshot_count - max_saved_screenshots)
144
+ screenshot_count = max_saved_screenshots
145
+
146
+ # Create the request parameters
147
+ request_params = {
148
+ "model": model,
149
+ "max_tokens": kwargs.get("max_tokens", 4096),
150
+ "system": system_prompt,
151
+ "messages": messages,
152
+ "tools": tools,
153
+ "betas": ["computer-use-2025-01-24"],
154
+ }
155
+
156
+ # Add thinking parameter only if explicitly enabled
157
+ if kwargs.get("thinking_enabled"):
158
+ request_params["thinking"] = {
159
+ "type": "enabled",
160
+ "budget_tokens": kwargs.get("thinking_budget", 1024)
161
+ }
162
+
163
+ # Create message request to Claude
164
+ response = client.beta.messages.create(**request_params)
165
+
166
+ # Extract the content from the response
167
+ response_content = response.content
168
+
169
+ # Add Claude's response to the conversation history
170
+ assistant_message = {"role": "assistant", "content": response_content}
171
+ messages.append(assistant_message)
172
+
173
+ # Notify callback of any text content
174
+ for block in response_content:
175
+ if block.type == "text" and callback:
176
+ callback("text", block.text)
177
+ elif block.type == "thinking" and callback:
178
+ callback("thinking", block.thinking)
179
+ elif block.type == "tool_use" and callback:
180
+ tool_params = {
181
+ "action": block.name.split(".")[-1],
182
+ **block.input
183
+ }
184
+ callback("tool_use", tool_params)
185
+
186
+ # Check if Claude requested any tool actions
187
+ tool_results = []
188
+ for block in response_content:
189
+ if block.type == "tool_use":
190
+ # Execute the tool action
191
+ result = self._execute_tool(computer_id, block.input, callback, api_client)
192
+
193
+ # Format the result for Claude
194
+ tool_result = {
195
+ "type": "tool_result",
196
+ "tool_use_id": block.id
197
+ }
198
+
199
+ # Handle image vs text results
200
+ if isinstance(result, dict) and "type" in result and result["type"] == "image":
201
+ tool_result["content"] = [result]
202
+ # Increment screenshot count when we add a new screenshot
203
+ if block.input.get("action") == "screenshot":
204
+ screenshot_count += 1
205
+ else:
206
+ tool_result["content"] = [{"type": "text", "text": str(result)}]
207
+
208
+ tool_results.append(tool_result)
209
+
210
+ # If no tools were used, Claude is done - return the messages
211
+ if not tool_results:
212
+ if callback:
213
+ callback("status", "Task completed")
214
+ return messages
215
+
216
+ # Add tool results to messages for the next iteration
217
+ messages.append({"role": "user", "content": tool_results})
218
+
219
+ # We've reached the maximum iteration limit
220
+ if callback:
221
+ callback("status", f"Reached maximum iterations ({max_iterations})")
222
+
223
+ return messages
224
+
225
+ except Exception as e:
226
+ if callback:
227
+ callback("error", str(e))
228
+ raise
229
+
230
+ def _prune_old_screenshots(self, messages: List[Dict[str, Any]], num_to_prune: int):
231
+ """
232
+ Remove old screenshots from the conversation history.
233
+
234
+ Args:
235
+ messages: The conversation history
236
+ num_to_prune: Number of screenshots to remove
237
+ """
238
+ screenshots_pruned = 0
239
+
240
+ # Start from the beginning of the messages (excluding the first user message)
241
+ for i in range(1, len(messages)):
242
+ if messages[i]["role"] != "user":
243
+ continue
244
+
245
+ content = messages[i]["content"]
246
+ if not isinstance(content, list):
247
+ continue
248
+
249
+ # Look for tool_result blocks in the content
250
+ for j, block in enumerate(content):
251
+ if not isinstance(block, dict):
252
+ continue
253
+
254
+ if block.get("type") != "tool_result":
255
+ continue
256
+
257
+ # Check if this tool_result contains an image
258
+ block_content = block.get("content", [])
259
+ for k, content_item in enumerate(block_content):
260
+ if not isinstance(content_item, dict):
261
+ continue
262
+
263
+ if content_item.get("type") == "image":
264
+ # This is a screenshot, remove it
265
+ if "source" in content_item and "data" in content_item["source"]:
266
+ # Replace the base64 data with a placeholder to save space
267
+ content_item["source"]["data"] = "[IMAGE DATA REMOVED]"
268
+ screenshots_pruned += 1
269
+
270
+ if screenshots_pruned >= num_to_prune:
271
+ return
272
+
273
+ def _execute_tool(self,
274
+ computer_id: str,
275
+ params: Dict[str, Any],
276
+ callback: Optional[Callable[[str, Any], None]] = None,
277
+ api_client = None) -> Union[str, Dict[str, Any]]:
278
+ """Execute a tool action via the API client."""
279
+ action = params.get("action")
280
+
281
+ if callback:
282
+ callback("tool_executing", {"action": action, "params": params})
283
+
284
+ try:
285
+ # Use the provided API client or create a new one
286
+ if api_client is None:
287
+ # Import here to avoid circular imports
288
+ from .api.client import ApiClient
289
+ api_client = ApiClient()
290
+
291
+ # Map actions to API methods
292
+ if action == "screenshot":
293
+ response = api_client.get_screenshot(computer_id)
294
+ if callback:
295
+ callback("tool_result", {"type": "image", "action": "screenshot"})
296
+ return {
297
+ "type": "image",
298
+ "source": {
299
+ "type": "base64",
300
+ "media_type": "image/jpeg",
301
+ "data": response.get("image", "")
302
+ }
303
+ }
304
+
305
+ elif action == "left_click":
306
+ if not params.get("coordinate"):
307
+ raise ValueError("Coordinates required for left click")
308
+ x, y = params["coordinate"]
309
+ api_client.left_click(computer_id, x, y)
310
+ if callback:
311
+ callback("tool_result", {"action": "left_click", "x": x, "y": y})
312
+ return f"Left-clicked at ({x}, {y})"
313
+
314
+ elif action == "right_click":
315
+ if not params.get("coordinate"):
316
+ raise ValueError("Coordinates required for right click")
317
+ x, y = params["coordinate"]
318
+ api_client.right_click(computer_id, x, y)
319
+ if callback:
320
+ callback("tool_result", {"action": "right_click", "x": x, "y": y})
321
+ return f"Right-clicked at ({x}, {y})"
322
+
323
+ elif action == "double_click":
324
+ if not params.get("coordinate"):
325
+ raise ValueError("Coordinates required for double click")
326
+ x, y = params["coordinate"]
327
+ api_client.double_click(computer_id, x, y)
328
+ if callback:
329
+ callback("tool_result", {"action": "double_click", "x": x, "y": y})
330
+ return f"Double-clicked at ({x}, {y})"
331
+
332
+ elif action == "type":
333
+ if not params.get("text"):
334
+ raise ValueError("Text required for typing")
335
+ text = params["text"]
336
+ api_client.type_text(computer_id, text)
337
+ if callback:
338
+ callback("tool_result", {"action": "type", "text": text})
339
+ return f"Typed: \"{text}\""
340
+
341
+ elif action == "key":
342
+ if not params.get("text"):
343
+ raise ValueError("Key required for key press")
344
+ key = params["text"]
345
+ # Handle the 'return' key as 'enter' when needed
346
+ if key.lower() == "return":
347
+ key = "enter"
348
+ api_client.key_press(computer_id, key)
349
+ if callback:
350
+ callback("tool_result", {"action": "key", "key": key})
351
+ return f"Pressed key: {key}"
352
+
353
+ elif action == "scroll":
354
+ if not params.get("scroll_direction") or params.get("scroll_amount") is None:
355
+ raise ValueError("Direction and amount required for scrolling")
356
+ direction = params["scroll_direction"]
357
+ amount = params["scroll_amount"]
358
+ api_client.scroll(computer_id, direction, amount)
359
+ if callback:
360
+ callback("tool_result", {"action": "scroll", "direction": direction, "amount": amount})
361
+ return f"Scrolled {direction} by {amount}"
362
+
363
+ elif action == "wait":
364
+ duration = params.get("duration", 1)
365
+ api_client.wait(computer_id, duration)
366
+ if callback:
367
+ callback("tool_result", {"action": "wait", "duration": duration})
368
+ return f"Waited for {duration} second(s)"
369
+
370
+ else:
371
+ error_msg = f"Unsupported action: {action}"
372
+ if callback:
373
+ callback("error", error_msg)
374
+ raise ValueError(error_msg)
375
+
376
+ except Exception as e:
377
+ error_msg = f"Error executing {action}: {str(e)}"
378
+ if callback:
379
+ callback("error", error_msg)
380
+ return f"Error: {error_msg}"
381
+
382
+
383
+ # Default provider mapping
384
+ PROVIDER_MAPPING = {
385
+ "anthropic": AnthropicProvider,
386
+ # Add more providers here as needed, e.g.:
387
+ # "openai": OpenAIProvider,
388
+ # "fireworks": FireworksProvider,
389
+ }
390
+
391
+
392
+ def get_provider(provider_name: str = "anthropic") -> PromptProvider:
393
+ """
394
+ Get a prompt provider by name.
395
+
396
+ Args:
397
+ provider_name: Name of the provider
398
+
399
+ Returns:
400
+ Provider instance
401
+ """
402
+ if provider_name not in PROVIDER_MAPPING:
403
+ raise ValueError(f"Unknown provider: {provider_name}. Available providers: {', '.join(PROVIDER_MAPPING.keys())}")
404
+
405
+ return PROVIDER_MAPPING[provider_name]()
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.9
4
- Summary: Desktop infrastructure for AI agents
3
+ Version: 0.0.11
4
+ Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://www.orgo.ai
@@ -2,6 +2,7 @@ README.md
2
2
  pyproject.toml
3
3
  src/orgo/__init__.py
4
4
  src/orgo/computer.py
5
+ src/orgo/prompt.py
5
6
  src/orgo.egg-info/PKG-INFO
6
7
  src/orgo.egg-info/SOURCES.txt
7
8
  src/orgo.egg-info/dependency_links.txt
@@ -1,84 +0,0 @@
1
- """Computer class for interacting with Orgo virtual environments"""
2
-
3
- import os
4
- import io
5
- import base64
6
- from typing import Dict, Any
7
- from PIL import Image
8
-
9
- from .api.client import ApiClient
10
-
11
- class Computer:
12
- def __init__(self, project_id=None, api_key=None, config=None):
13
- self.api = ApiClient(api_key or os.environ.get("ORGO_API_KEY"))
14
-
15
- if project_id:
16
- self.project_id = project_id
17
- self._info = self.api.connect_computer(project_id)
18
- else:
19
- response = self.api.create_computer(config)
20
- self.project_id = response.get("name")
21
- self._info = response
22
-
23
- if not self.project_id:
24
- raise ValueError("Failed to initialize computer: No project ID returned")
25
-
26
- def status(self) -> Dict[str, Any]:
27
- """Get current computer status"""
28
- return self.api.get_status(self.project_id)
29
-
30
- def restart(self) -> Dict[str, Any]:
31
- """Restart the computer"""
32
- return self.api.restart_computer(self.project_id)
33
-
34
- def shutdown(self) -> Dict[str, Any]:
35
- """Terminate the computer instance"""
36
- return self.api.shutdown_computer(self.project_id)
37
-
38
- # Navigation methods
39
- def left_click(self, x: int, y: int) -> Dict[str, Any]:
40
- """Perform left mouse click at specified coordinates"""
41
- return self.api.left_click(self.project_id, x, y)
42
-
43
- def right_click(self, x: int, y: int) -> Dict[str, Any]:
44
- """Perform right mouse click at specified coordinates"""
45
- return self.api.right_click(self.project_id, x, y)
46
-
47
- def double_click(self, x: int, y: int) -> Dict[str, Any]:
48
- """Perform double click at specified coordinates"""
49
- return self.api.double_click(self.project_id, x, y)
50
-
51
- def scroll(self, direction: str = "down", amount: int = 1) -> Dict[str, Any]:
52
- """Scroll in specified direction and amount"""
53
- return self.api.scroll(self.project_id, direction, amount)
54
-
55
- # Input methods
56
- def type(self, text: str) -> Dict[str, Any]:
57
- """Type the specified text"""
58
- return self.api.type_text(self.project_id, text)
59
-
60
- def key(self, key: str) -> Dict[str, Any]:
61
- """Press a key or key combination (e.g., "Enter", "ctrl+c")"""
62
- return self.api.key_press(self.project_id, key)
63
-
64
- # View methods
65
- def screenshot(self) -> Image.Image:
66
- """Capture screenshot and return as PIL Image"""
67
- response = self.api.get_screenshot(self.project_id)
68
- img_data = base64.b64decode(response.get("image", ""))
69
- return Image.open(io.BytesIO(img_data))
70
-
71
- def screenshot_base64(self) -> str:
72
- """Capture screenshot and return as base64 string"""
73
- response = self.api.get_screenshot(self.project_id)
74
- return response.get("image", "")
75
-
76
- # Execution methods
77
- def bash(self, command: str) -> str:
78
- """Execute a bash command and return output"""
79
- response = self.api.execute_bash(self.project_id, command)
80
- return response.get("output", "")
81
-
82
- def wait(self, seconds: float) -> Dict[str, Any]:
83
- """Wait for specified number of seconds"""
84
- return self.api.wait(self.project_id, seconds)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes