orgo 0.0.13__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
@@ -35,6 +35,9 @@ computer.type("Hello world")
35
35
  computer.key("Enter")
36
36
  computer.screenshot() # Returns PIL Image
37
37
 
38
+ # Execute Python code
39
+ computer.exec("import pyautogui; pyautogui.click(512, 384)")
40
+
38
41
  # Cleanup
39
42
  computer.shutdown()
40
43
  ```
@@ -22,6 +22,9 @@ computer.type("Hello world")
22
22
  computer.key("Enter")
23
23
  computer.screenshot() # Returns PIL Image
24
24
 
25
+ # Execute Python code
26
+ computer.exec("import pyautogui; pyautogui.click(512, 384)")
27
+
25
28
  # Cleanup
26
29
  computer.shutdown()
27
30
  ```
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "orgo"
7
- version = "0.0.13"
7
+ version = "0.0.15"
8
8
  description = "Computers for AI agents"
9
9
  authors = [{name = "Orgo Team"}]
10
10
  license = {text = "MIT"}
@@ -1,3 +1,4 @@
1
+ # src/orgo/__init__.py
1
2
  """Orgo SDK: Desktop infrastructure for AI agents"""
2
3
 
3
4
  from .computer import Computer
@@ -1,3 +1,4 @@
1
+ # src/orgo/api/__init__.py
1
2
  """API package for Orgo SDK"""
2
3
 
3
4
  from .client import ApiClient
@@ -1,5 +1,5 @@
1
- """API client for Orgo service"""
2
1
  # src/orgo/api/client.py
2
+ """API client for Orgo service"""
3
3
 
4
4
  import requests
5
5
  from typing import Dict, Any, Optional
@@ -96,6 +96,13 @@ class ApiClient:
96
96
  "command": command
97
97
  })
98
98
 
99
+ def execute_python(self, project_id: str, code: str, timeout: int = 10) -> Dict[str, Any]:
100
+ """Execute Python code on the computer"""
101
+ return self._request("POST", f"computers/{project_id}/exec", {
102
+ "code": code,
103
+ "timeout": timeout
104
+ })
105
+
99
106
  def wait(self, project_id: str, seconds: float) -> Dict[str, Any]:
100
107
  return self._request("POST", f"computers/{project_id}/wait", {
101
108
  "seconds": seconds
@@ -1,10 +1,12 @@
1
+ # src/orgo/computer.py
1
2
  """Computer class for interacting with Orgo virtual environments"""
2
3
  import os
3
4
  import io
4
5
  import base64
5
6
  import logging
6
- from typing import Dict, List, Any, Optional, Callable, Union
7
+ from typing import Dict, List, Any, Optional, Callable
7
8
  from PIL import Image
9
+ import requests
8
10
  from requests.exceptions import RequestException
9
11
 
10
12
  from .api.client import ApiClient
@@ -96,13 +98,33 @@ class Computer:
96
98
  def screenshot(self) -> Image.Image:
97
99
  """Capture screenshot and return as PIL Image"""
98
100
  response = self.api.get_screenshot(self.project_id)
99
- img_data = base64.b64decode(response.get("image", ""))
100
- return Image.open(io.BytesIO(img_data))
101
+ image_data = response.get("image", "")
102
+
103
+ # Check if it's a URL (new format) or base64 (legacy format)
104
+ if image_data.startswith(('http://', 'https://')):
105
+ # Download image from URL
106
+ img_response = requests.get(image_data)
107
+ img_response.raise_for_status()
108
+ return Image.open(io.BytesIO(img_response.content))
109
+ else:
110
+ # Legacy base64 format
111
+ img_data = base64.b64decode(image_data)
112
+ return Image.open(io.BytesIO(img_data))
101
113
 
102
114
  def screenshot_base64(self) -> str:
103
115
  """Capture screenshot and return as base64 string"""
104
116
  response = self.api.get_screenshot(self.project_id)
105
- return response.get("image", "")
117
+ image_data = response.get("image", "")
118
+
119
+ # Check if it's a URL (new format) or base64 (legacy format)
120
+ if image_data.startswith(('http://', 'https://')):
121
+ # Download image from URL and convert to base64
122
+ img_response = requests.get(image_data)
123
+ img_response.raise_for_status()
124
+ return base64.b64encode(img_response.content).decode('utf-8')
125
+ else:
126
+ # Already base64
127
+ return image_data
106
128
 
107
129
  # Execution methods
108
130
  def bash(self, command: str) -> str:
@@ -110,6 +132,37 @@ class Computer:
110
132
  response = self.api.execute_bash(self.project_id, command)
111
133
  return response.get("output", "")
112
134
 
135
+ def exec(self, code: str, timeout: int = 10) -> Dict[str, Any]:
136
+ """
137
+ Execute Python code on the remote computer.
138
+
139
+ Args:
140
+ code: Python code to execute
141
+ timeout: Maximum execution time in seconds (default: 10, max: 300)
142
+
143
+ Returns:
144
+ Dict with keys:
145
+ - success: bool indicating if execution completed without errors
146
+ - output: str containing stdout output
147
+ - error: str containing error message if any
148
+ - error_type: str with exception type name if error occurred
149
+ - timeout: bool indicating if execution timed out
150
+
151
+ Example:
152
+ result = computer.exec('''
153
+ import os
154
+ print(f"Current directory: {os.getcwd()}")
155
+ print(f"Files: {os.listdir('.')}")
156
+ ''')
157
+
158
+ if result['success']:
159
+ print(result['output'])
160
+ else:
161
+ print(f"Error: {result['error']}")
162
+ """
163
+ response = self.api.execute_python(self.project_id, code, timeout)
164
+ return response
165
+
113
166
  def wait(self, seconds: float) -> Dict[str, Any]:
114
167
  """Wait for specified number of seconds"""
115
168
  return self.api.wait(self.project_id, seconds)
@@ -168,4 +221,4 @@ class Computer:
168
221
  # Pass through the Orgo API client configuration
169
222
  orgo_api_key=self.api_key,
170
223
  orgo_base_url=self.base_api_url
171
- )
224
+ )
@@ -1,3 +1,4 @@
1
+ # src/orgo/project.py
1
2
  """Project management for Orgo virtual environments"""
2
3
  import os
3
4
  import json
@@ -1,3 +1,4 @@
1
+ # src/orgo/prompt.py
1
2
  """
2
3
  Prompt module for interacting with virtual computers using AI models.
3
4
  """
@@ -1,3 +1,4 @@
1
+ # src/orgo/utils/__init__.py
1
2
  """Utility functions for Orgo SDK"""
2
3
 
3
4
  from .auth import get_api_key
@@ -1,3 +1,4 @@
1
+ # src/orgo/utils/auth.py
1
2
  """Authentication utilities for Orgo SDK"""
2
3
 
3
4
  import os
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orgo
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: Computers for AI agents
5
5
  Author: Orgo Team
6
6
  License: MIT
@@ -35,6 +35,9 @@ computer.type("Hello world")
35
35
  computer.key("Enter")
36
36
  computer.screenshot() # Returns PIL Image
37
37
 
38
+ # Execute Python code
39
+ computer.exec("import pyautogui; pyautogui.click(512, 384)")
40
+
38
41
  # Cleanup
39
42
  computer.shutdown()
40
43
  ```
@@ -9,10 +9,6 @@ src/orgo.egg-info/SOURCES.txt
9
9
  src/orgo.egg-info/dependency_links.txt
10
10
  src/orgo.egg-info/requires.txt
11
11
  src/orgo.egg-info/top_level.txt
12
- src/orgo/adapters/__init__.py
13
- src/orgo/adapters/anthropic.py
14
- src/orgo/adapters/base.py
15
- src/orgo/adapters/openai.py
16
12
  src/orgo/api/__init__.py
17
13
  src/orgo/api/client.py
18
14
  src/orgo/utils/__init__.py
@@ -1,7 +0,0 @@
1
- """Adapters for integrating Orgo with AI models"""
2
-
3
- from .base import BaseAdapter
4
- from .anthropic import AnthropicAdapter
5
- from .openai import OpenAIAdapter
6
-
7
- __all__ = ["BaseAdapter", "AnthropicAdapter", "OpenAIAdapter"]
@@ -1,64 +0,0 @@
1
- """Adapter for Anthropic's Claude"""
2
-
3
- from typing import Dict, Any, Optional
4
- from .base import BaseAdapter
5
-
6
- class AnthropicAdapter(BaseAdapter):
7
- def get_tool_definition(self) -> Dict[str, Any]:
8
- return {
9
- "name": "computer",
10
- "description": "Controls a virtual computer to automate tasks",
11
- "type": "function",
12
- "parameters": {
13
- "type": "object",
14
- "properties": {
15
- "action": {
16
- "type": "string",
17
- "enum": ["left_click", "right_click", "double_click", "type", "key", "scroll", "screenshot"],
18
- "description": "The action to perform on the computer"
19
- },
20
- "coordinate": {
21
- "type": "array",
22
- "items": {"type": "number"},
23
- "description": "The x,y coordinates for click actions",
24
- "minItems": 2,
25
- "maxItems": 2
26
- },
27
- "text": {
28
- "type": "string",
29
- "description": "The text to type or key to press"
30
- },
31
- "direction": {
32
- "type": "string",
33
- "enum": ["up", "down", "left", "right"],
34
- "description": "The direction to scroll"
35
- },
36
- "amount": {
37
- "type": "number",
38
- "description": "The amount to scroll"
39
- }
40
- },
41
- "required": ["action"],
42
- "additionalProperties": False
43
- }
44
- }
45
-
46
- def format_result(self, tool_id: str, output: Optional[str] = None, error: Optional[str] = None) -> Dict[str, Any]:
47
- screenshot = self.computer.get_base64()
48
- result = {
49
- "type": "tool_result",
50
- "id": tool_id,
51
- "content": {
52
- "type": "image",
53
- "source": {
54
- "type": "base64",
55
- "media_type": "image/png",
56
- "data": screenshot
57
- }
58
- }
59
- }
60
-
61
- if error:
62
- result["error"] = error
63
-
64
- return result
@@ -1,14 +0,0 @@
1
- """Base adapter for AI model integration"""
2
-
3
- from typing import Dict, Any, Optional
4
- from ..computer import Computer
5
-
6
- class BaseAdapter:
7
- def __init__(self, computer: Computer):
8
- self.computer = computer
9
-
10
- def get_tool_definition(self) -> Dict[str, Any]:
11
- raise NotImplementedError("Subclasses must implement get_tool_definition()")
12
-
13
- def format_result(self, tool_id: str, output: Optional[str] = None, error: Optional[str] = None) -> Dict[str, Any]:
14
- raise NotImplementedError("Subclasses must implement format_result()")
@@ -1,69 +0,0 @@
1
- """Adapter for OpenAI models"""
2
-
3
- from typing import Dict, Any, Optional
4
- from .base import BaseAdapter
5
-
6
- class OpenAIAdapter(BaseAdapter):
7
- def get_tool_definition(self) -> Dict[str, Any]:
8
- return {
9
- "type": "function",
10
- "function": {
11
- "name": "computer",
12
- "description": "Controls a virtual computer to automate tasks",
13
- "parameters": {
14
- "type": "object",
15
- "properties": {
16
- "action": {
17
- "type": "string",
18
- "enum": ["click", "right_click", "double_click", "type", "key", "scroll", "screenshot"],
19
- "description": "The action to perform on the computer"
20
- },
21
- "x": {
22
- "type": "number",
23
- "description": "The x coordinate for click actions"
24
- },
25
- "y": {
26
- "type": "number",
27
- "description": "The y coordinate for click actions"
28
- },
29
- "text": {
30
- "type": "string",
31
- "description": "The text to type or key to press"
32
- },
33
- "direction": {
34
- "type": "string",
35
- "enum": ["up", "down", "left", "right"],
36
- "description": "The direction to scroll"
37
- },
38
- "amount": {
39
- "type": "number",
40
- "description": "The amount to scroll"
41
- }
42
- },
43
- "required": ["action"],
44
- "additionalProperties": False
45
- }
46
- }
47
- }
48
-
49
- def format_result(self, tool_id: str, output: Optional[str] = None, error: Optional[str] = None) -> Dict[str, Any]:
50
- screenshot = self.computer.get_base64()
51
- result = {
52
- "tool_call_id": tool_id,
53
- "content": [
54
- {
55
- "type": "image_url",
56
- "image_url": {
57
- "url": f"data:image/png;base64,{screenshot}"
58
- }
59
- }
60
- ]
61
- }
62
-
63
- if error:
64
- result["content"].insert(0, {
65
- "type": "text",
66
- "text": f"Error: {error}"
67
- })
68
-
69
- return result
File without changes