oagi-core 0.10.3__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. oagi/__init__.py +1 -3
  2. oagi/actor/__init__.py +21 -0
  3. oagi/{task → actor}/async_.py +23 -7
  4. oagi/{task → actor}/async_short.py +1 -1
  5. oagi/actor/base.py +222 -0
  6. oagi/{task → actor}/short.py +1 -1
  7. oagi/{task → actor}/sync.py +21 -5
  8. oagi/agent/default.py +5 -0
  9. oagi/agent/factories.py +75 -3
  10. oagi/agent/observer/exporters.py +6 -0
  11. oagi/agent/observer/report_template.html +19 -0
  12. oagi/agent/tasker/planner.py +31 -19
  13. oagi/agent/tasker/taskee_agent.py +26 -7
  14. oagi/agent/tasker/tasker_agent.py +4 -0
  15. oagi/cli/agent.py +54 -30
  16. oagi/client/async_.py +54 -96
  17. oagi/client/base.py +81 -133
  18. oagi/client/sync.py +52 -99
  19. oagi/constants.py +7 -2
  20. oagi/handler/__init__.py +16 -0
  21. oagi/handler/_macos.py +137 -0
  22. oagi/handler/_windows.py +101 -0
  23. oagi/handler/async_pyautogui_action_handler.py +8 -0
  24. oagi/handler/capslock_manager.py +55 -0
  25. oagi/handler/pyautogui_action_handler.py +21 -39
  26. oagi/server/session_store.py +3 -3
  27. oagi/server/socketio_server.py +4 -4
  28. oagi/task/__init__.py +22 -8
  29. oagi/types/__init__.py +2 -1
  30. oagi/types/models/__init__.py +0 -2
  31. oagi/types/models/action.py +4 -1
  32. oagi/types/models/client.py +1 -17
  33. oagi/types/step_observer.py +2 -0
  34. oagi/types/url.py +25 -0
  35. oagi/utils/__init__.py +12 -0
  36. oagi/utils/output_parser.py +166 -0
  37. oagi/utils/prompt_builder.py +44 -0
  38. {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/METADATA +90 -10
  39. oagi_core-0.12.0.dist-info/RECORD +76 -0
  40. oagi/task/base.py +0 -158
  41. oagi_core-0.10.3.dist-info/RECORD +0 -70
  42. {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/WHEEL +0 -0
  43. {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/entry_points.txt +0 -0
  44. {oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/licenses/LICENSE +0 -0
oagi/client/base.py CHANGED
@@ -11,7 +11,12 @@ from typing import Any, Generic, TypeVar
11
11
 
12
12
  import httpx
13
13
 
14
- from ..constants import API_KEY_HELP_URL, DEFAULT_BASE_URL, HTTP_CLIENT_TIMEOUT
14
+ from ..constants import (
15
+ API_KEY_HELP_URL,
16
+ DEFAULT_BASE_URL,
17
+ DEFAULT_MAX_RETRIES,
18
+ HTTP_CLIENT_TIMEOUT,
19
+ )
15
20
  from ..exceptions import (
16
21
  APIError,
17
22
  AuthenticationError,
@@ -27,9 +32,11 @@ from ..logging import get_logger
27
32
  from ..types.models import (
28
33
  ErrorResponse,
29
34
  GenerateResponse,
30
- LLMResponse,
31
35
  UploadFileResponse,
36
+ Usage,
32
37
  )
38
+ from ..types.models.step import Step
39
+ from ..utils.output_parser import parse_raw_output
33
40
 
34
41
  logger = get_logger("client.base")
35
42
 
@@ -40,7 +47,12 @@ HttpClientT = TypeVar("HttpClientT")
40
47
  class BaseClient(Generic[HttpClientT]):
41
48
  """Base class with shared business logic for sync/async clients."""
42
49
 
43
- def __init__(self, base_url: str | None = None, api_key: str | None = None):
50
+ def __init__(
51
+ self,
52
+ base_url: str | None = None,
53
+ api_key: str | None = None,
54
+ max_retries: int = DEFAULT_MAX_RETRIES,
55
+ ):
44
56
  # Get from environment if not provided
45
57
  self.base_url = base_url or os.getenv("OAGI_BASE_URL") or DEFAULT_BASE_URL
46
58
  self.api_key = api_key or os.getenv("OAGI_API_KEY")
@@ -55,6 +67,7 @@ class BaseClient(Generic[HttpClientT]):
55
67
 
56
68
  self.base_url = self.base_url.rstrip("/")
57
69
  self.timeout = HTTP_CLIENT_TIMEOUT
70
+ self.max_retries = max_retries
58
71
  self.client: HttpClientT # Will be set by subclasses
59
72
 
60
73
  logger.info(f"Client initialized with base_url: {self.base_url}")
@@ -67,39 +80,77 @@ class BaseClient(Generic[HttpClientT]):
67
80
  headers["x-api-key"] = self.api_key
68
81
  return headers
69
82
 
70
- def _build_payload(
83
+ @staticmethod
84
+ def _log_trace_id(response) -> None:
85
+ """Log trace IDs from response headers for debugging."""
86
+ logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
87
+ logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
88
+
89
+ def _build_chat_completion_kwargs(
71
90
  self,
72
91
  model: str,
73
- messages_history: list,
74
- task_description: str | None = None,
75
- task_id: str | None = None,
92
+ messages: list,
76
93
  temperature: float | None = None,
77
- ) -> dict[str, Any]:
78
- """Build OpenAI-compatible request payload.
94
+ task_id: str | None = None,
95
+ ) -> dict:
96
+ """Build kwargs dict for OpenAI chat completion call.
79
97
 
80
98
  Args:
81
- model: Model to use
82
- messages_history: OpenAI-compatible message history
83
- task_description: Task description
84
- task_id: Task ID for continuing session
85
- temperature: Sampling temperature
99
+ model: Model to use for inference
100
+ messages: Full message history (OpenAI-compatible format)
101
+ temperature: Sampling temperature (0.0-2.0)
102
+ task_id: Optional task ID for multi-turn conversations
86
103
 
87
104
  Returns:
88
- OpenAI-compatible request payload
105
+ Dict of kwargs for chat.completions.create()
89
106
  """
90
- payload: dict[str, Any] = {
91
- "model": model,
92
- "messages": messages_history,
93
- }
94
-
95
- if task_description is not None:
96
- payload["task_description"] = task_description
97
- if task_id is not None:
98
- payload["task_id"] = task_id
107
+ kwargs: dict = {"model": model, "messages": messages}
99
108
  if temperature is not None:
100
- payload["temperature"] = temperature
109
+ kwargs["temperature"] = temperature
110
+ if task_id is not None:
111
+ kwargs["extra_body"] = {"task_id": task_id}
112
+ return kwargs
113
+
114
+ def _parse_chat_completion_response(
115
+ self, response
116
+ ) -> tuple[Step, str, Usage | None]:
117
+ """Extract and parse OpenAI chat completion response, and log success.
118
+
119
+ This is sync/async agnostic as it only processes the response object.
120
+
121
+ Args:
122
+ response: OpenAI ChatCompletion response object
101
123
 
102
- return payload
124
+ Returns:
125
+ Tuple of (Step, raw_output, Usage)
126
+ """
127
+ raw_output = response.choices[0].message.content or ""
128
+ step = parse_raw_output(raw_output)
129
+
130
+ # Extract task_id from response (custom field from OAGI API)
131
+ task_id = getattr(response, "task_id", None)
132
+
133
+ usage = None
134
+ if response.usage:
135
+ usage = Usage(
136
+ prompt_tokens=response.usage.prompt_tokens,
137
+ completion_tokens=response.usage.completion_tokens,
138
+ total_tokens=response.usage.total_tokens,
139
+ )
140
+
141
+ # Log success with task_id and usage
142
+ usage_str = (
143
+ f", tokens: {usage.prompt_tokens}+{usage.completion_tokens}"
144
+ if usage
145
+ else ""
146
+ )
147
+ task_str = f"task_id: {task_id}, " if task_id else ""
148
+ logger.info(
149
+ f"Chat completion successful - {task_str}actions: {len(step.actions)}, "
150
+ f"stop: {step.stop}{usage_str}"
151
+ )
152
+
153
+ return step, raw_output, usage
103
154
 
104
155
  def _handle_response_error(
105
156
  self, response: httpx.Response, response_data: dict
@@ -141,84 +192,6 @@ class BaseClient(Generic[HttpClientT]):
141
192
 
142
193
  return status_map.get(status_code, APIError)
143
194
 
144
- def _log_request_info(self, model: str, task_description: Any, task_id: Any):
145
- logger.info(f"Making API request to /v2/message with model: {model}")
146
- logger.debug(
147
- f"Request includes task_description: {task_description is not None}, "
148
- f"task_id: {task_id is not None}"
149
- )
150
-
151
- def _build_user_message(
152
- self, screenshot_url: str, instruction: str | None
153
- ) -> dict[str, Any]:
154
- """Build OpenAI-compatible user message with screenshot and optional instruction.
155
-
156
- Args:
157
- screenshot_url: URL of uploaded screenshot
158
- instruction: Optional text instruction
159
-
160
- Returns:
161
- User message dict
162
- """
163
- content = [{"type": "image_url", "image_url": {"url": screenshot_url}}]
164
- if instruction:
165
- content.append({"type": "text", "text": instruction})
166
- return {"role": "user", "content": content}
167
-
168
- def _prepare_message_payload(
169
- self,
170
- model: str,
171
- upload_file_response: UploadFileResponse | None,
172
- task_description: str | None,
173
- task_id: str | None,
174
- instruction: str | None,
175
- messages_history: list | None,
176
- temperature: float | None,
177
- api_version: str | None,
178
- screenshot_url: str | None = None,
179
- ) -> tuple[dict[str, str], dict[str, Any]]:
180
- """Prepare headers and payload for /v2/message request.
181
-
182
- Args:
183
- model: Model to use
184
- upload_file_response: Response from S3 upload (if screenshot was uploaded)
185
- task_description: Task description
186
- task_id: Task ID
187
- instruction: Optional instruction
188
- messages_history: Message history
189
- temperature: Sampling temperature
190
- api_version: API version
191
- screenshot_url: Direct screenshot URL (alternative to upload_file_response)
192
-
193
- Returns:
194
- Tuple of (headers, payload)
195
- """
196
- # Use provided screenshot_url or get from upload_file_response
197
- if screenshot_url is None:
198
- if upload_file_response is None:
199
- raise ValueError(
200
- "Either screenshot_url or upload_file_response must be provided"
201
- )
202
- screenshot_url = upload_file_response.download_url
203
-
204
- # Build user message and append to history
205
- if messages_history is None:
206
- messages_history = []
207
- user_message = self._build_user_message(screenshot_url, instruction)
208
- messages_history.append(user_message)
209
-
210
- # Build payload and headers
211
- headers = self._build_headers(api_version)
212
- payload = self._build_payload(
213
- model=model,
214
- messages_history=messages_history,
215
- task_description=task_description,
216
- task_id=task_id,
217
- temperature=temperature,
218
- )
219
-
220
- return headers, payload
221
-
222
195
  def _parse_response_json(self, response: httpx.Response) -> dict[str, Any]:
223
196
  try:
224
197
  return response.json()
@@ -230,35 +203,6 @@ class BaseClient(Generic[HttpClientT]):
230
203
  response=response,
231
204
  )
232
205
 
233
- def _process_response(self, response: httpx.Response) -> "LLMResponse":
234
- response_data = self._parse_response_json(response)
235
-
236
- # Check if it's an error response (non-200 status)
237
- if response.status_code != 200:
238
- self._handle_response_error(response, response_data)
239
-
240
- # Parse successful response
241
- result = LLMResponse(**response_data)
242
-
243
- # Check if the response contains an error (even with 200 status)
244
- if result.error:
245
- logger.error(
246
- f"API Error in response: [{result.error.code}]: {result.error.message}"
247
- )
248
- raise APIError(
249
- result.error.message,
250
- code=result.error.code,
251
- status_code=200,
252
- response=response,
253
- )
254
-
255
- logger.info(
256
- f"API request successful - task_id: {result.task_id}, "
257
- f"complete: {result.is_complete}"
258
- )
259
- logger.debug(f"Response included {len(result.actions)} actions")
260
- return result
261
-
262
206
  def _process_upload_response(self, response: httpx.Response) -> UploadFileResponse:
263
207
  """Process response from /v1/file/upload endpoint.
264
208
 
@@ -449,7 +393,11 @@ class BaseClient(Generic[HttpClientT]):
449
393
  # Parse successful response
450
394
  result = GenerateResponse(**response_data)
451
395
 
396
+ # Capture request_id from response header
397
+ result.request_id = response.headers.get("X-Request-ID")
398
+
452
399
  logger.info(
453
400
  f"Generate request successful - tokens: {result.prompt_tokens}+{result.completion_tokens}, "
401
+ f"request_id: {result.request_id}"
454
402
  )
455
403
  return result
oagi/client/sync.py CHANGED
@@ -9,28 +9,24 @@
9
9
  from functools import wraps
10
10
 
11
11
  import httpx
12
- from httpx import Response
12
+ from httpx import HTTPTransport
13
+ from openai import OpenAI
13
14
 
14
15
  from ..constants import (
15
- API_HEALTH_ENDPOINT,
16
16
  API_V1_FILE_UPLOAD_ENDPOINT,
17
17
  API_V1_GENERATE_ENDPOINT,
18
- API_V2_MESSAGE_ENDPOINT,
18
+ DEFAULT_MAX_RETRIES,
19
19
  HTTP_CLIENT_TIMEOUT,
20
20
  )
21
21
  from ..logging import get_logger
22
22
  from ..types import Image
23
- from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
23
+ from ..types.models import GenerateResponse, UploadFileResponse, Usage
24
+ from ..types.models.step import Step
24
25
  from .base import BaseClient
25
26
 
26
27
  logger = get_logger("sync_client")
27
28
 
28
29
 
29
- def _log_trace_id(response: Response):
30
- logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
31
- logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
32
-
33
-
34
30
  def log_trace_on_failure(func):
35
31
  """Decorator that logs trace ID when a method fails."""
36
32
 
@@ -41,7 +37,7 @@ def log_trace_on_failure(func):
41
37
  except Exception as e:
42
38
  # Try to get response from the exception if it has one
43
39
  if (response := getattr(e, "response", None)) is not None:
44
- _log_trace_id(response)
40
+ BaseClient._log_trace_id(response)
45
41
  raise
46
42
 
47
43
  return wrapper
@@ -50,113 +46,70 @@ def log_trace_on_failure(func):
50
46
  class SyncClient(BaseClient[httpx.Client]):
51
47
  """Synchronous HTTP client for the OAGI API."""
52
48
 
53
- def __init__(self, base_url: str | None = None, api_key: str | None = None):
54
- super().__init__(base_url, api_key)
55
- self.client = httpx.Client(base_url=self.base_url)
56
- self.upload_client = httpx.Client(timeout=HTTP_CLIENT_TIMEOUT)
49
+ def __init__(
50
+ self,
51
+ base_url: str | None = None,
52
+ api_key: str | None = None,
53
+ max_retries: int = DEFAULT_MAX_RETRIES,
54
+ ):
55
+ super().__init__(base_url, api_key, max_retries)
56
+
57
+ # OpenAI client for chat completions (with retries)
58
+ self.openai_client = OpenAI(
59
+ api_key=self.api_key,
60
+ base_url=f"{self.base_url}/v1",
61
+ max_retries=self.max_retries,
62
+ )
63
+
64
+ # httpx clients for S3 uploads and other endpoints (with retries)
65
+ transport = HTTPTransport(retries=self.max_retries)
66
+ self.http_client = httpx.Client(transport=transport, base_url=self.base_url)
67
+ self.upload_client = httpx.Client(
68
+ transport=transport, timeout=HTTP_CLIENT_TIMEOUT
69
+ )
70
+
57
71
  logger.info(f"SyncClient initialized with base_url: {self.base_url}")
58
72
 
59
73
  def __enter__(self):
60
74
  return self
61
75
 
62
76
  def __exit__(self, exc_type, exc_val, exc_tb):
63
- self.client.close()
64
- self.upload_client.close()
77
+ self.close()
65
78
 
66
79
  def close(self):
67
- """Close the underlying httpx clients."""
68
- self.client.close()
80
+ """Close the underlying clients."""
81
+ self.openai_client.close()
82
+ self.http_client.close()
69
83
  self.upload_client.close()
70
84
 
71
- @log_trace_on_failure
72
- def create_message(
85
+ def chat_completion(
73
86
  self,
74
87
  model: str,
75
- screenshot: bytes | None = None,
76
- screenshot_url: str | None = None,
77
- task_description: str | None = None,
78
- task_id: str | None = None,
79
- instruction: str | None = None,
80
- messages_history: list | None = None,
88
+ messages: list,
81
89
  temperature: float | None = None,
82
- api_version: str | None = None,
83
- ) -> LLMResponse | None:
90
+ task_id: str | None = None,
91
+ ) -> tuple[Step, str, Usage | None]:
84
92
  """
85
- Call the /v2/message endpoint to analyze task and screenshot
93
+ Call OpenAI-compatible /v1/chat/completions endpoint.
86
94
 
87
95
  Args:
88
- model: The model to use for task analysis
89
- screenshot: Screenshot image bytes (mutually exclusive with screenshot_url)
90
- screenshot_url: Direct URL to screenshot (mutually exclusive with screenshot)
91
- task_description: Description of the task (required for new sessions)
92
- task_id: Task ID for continuing existing task
93
- instruction: Additional instruction when continuing a session
94
- messages_history: OpenAI-compatible chat message history
95
- temperature: Sampling temperature (0.0-2.0) for LLM inference
96
- api_version: API version header
96
+ model: Model to use for inference
97
+ messages: Full message history (OpenAI-compatible format)
98
+ temperature: Sampling temperature (0.0-2.0)
99
+ task_id: Optional task ID for multi-turn conversations
97
100
 
98
101
  Returns:
99
- LLMResponse: The response from the API
100
-
101
- Raises:
102
- ValueError: If both or neither screenshot and screenshot_url are provided
103
- httpx.HTTPStatusError: For HTTP error responses
102
+ Tuple of (Step, raw_output, Usage)
103
+ - Step: Parsed actions and reasoning
104
+ - raw_output: Raw model output string (for message history)
105
+ - Usage: Token usage statistics (or None if not available)
104
106
  """
105
- # Validate that exactly one is provided
106
- if (screenshot is None) == (screenshot_url is None):
107
- raise ValueError(
108
- "Exactly one of 'screenshot' or 'screenshot_url' must be provided"
109
- )
110
-
111
- self._log_request_info(model, task_description, task_id)
112
-
113
- # Upload screenshot to S3 if bytes provided, otherwise use URL directly
114
- upload_file_response = None
115
- if screenshot is not None:
116
- upload_file_response = self.put_s3_presigned_url(screenshot, api_version)
117
-
118
- # Prepare message payload
119
- headers, payload = self._prepare_message_payload(
120
- model=model,
121
- upload_file_response=upload_file_response,
122
- task_description=task_description,
123
- task_id=task_id,
124
- instruction=instruction,
125
- messages_history=messages_history,
126
- temperature=temperature,
127
- api_version=api_version,
128
- screenshot_url=screenshot_url,
107
+ logger.info(f"Making chat completion request with model: {model}")
108
+ kwargs = self._build_chat_completion_kwargs(
109
+ model, messages, temperature, task_id
129
110
  )
130
-
131
- # Make request
132
- try:
133
- response = self.client.post(
134
- API_V2_MESSAGE_ENDPOINT,
135
- json=payload,
136
- headers=headers,
137
- timeout=self.timeout,
138
- )
139
- return self._process_response(response)
140
- except (httpx.TimeoutException, httpx.NetworkError) as e:
141
- self._handle_upload_http_errors(e)
142
-
143
- def health_check(self) -> dict:
144
- """
145
- Call the /health endpoint for health check
146
-
147
- Returns:
148
- dict: Health check response
149
- """
150
- logger.debug("Making health check request")
151
- try:
152
- response = self.client.get(API_HEALTH_ENDPOINT)
153
- response.raise_for_status()
154
- result = response.json()
155
- logger.debug("Health check successful")
156
- return result
157
- except httpx.HTTPStatusError as e:
158
- logger.warning(f"Health check failed: {e}")
159
- raise
111
+ response = self.openai_client.chat.completions.create(**kwargs)
112
+ return self._parse_chat_completion_response(response)
160
113
 
161
114
  def get_s3_presigned_url(
162
115
  self,
@@ -175,7 +128,7 @@ class SyncClient(BaseClient[httpx.Client]):
175
128
 
176
129
  try:
177
130
  headers = self._build_headers(api_version)
178
- response = self.client.get(
131
+ response = self.http_client.get(
179
132
  API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
180
133
  )
181
134
  return self._process_upload_response(response)
@@ -295,7 +248,7 @@ class SyncClient(BaseClient[httpx.Client]):
295
248
 
296
249
  # Make request
297
250
  try:
298
- response = self.client.post(
251
+ response = self.http_client.post(
299
252
  API_V1_GENERATE_ENDPOINT,
300
253
  json=payload,
301
254
  headers=headers,
oagi/constants.py CHANGED
@@ -9,10 +9,8 @@
9
9
  # URLs & API Endpoints
10
10
  DEFAULT_BASE_URL = "https://api.agiopen.org"
11
11
  API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys"
12
- API_V2_MESSAGE_ENDPOINT = "/v2/message"
13
12
  API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload"
14
13
  API_V1_GENERATE_ENDPOINT = "/v1/generate"
15
- API_HEALTH_ENDPOINT = "/health"
16
14
 
17
15
  # Model identifiers
18
16
  MODEL_ACTOR = "lux-actor-1"
@@ -28,6 +26,10 @@ DEFAULT_MAX_STEPS = 20
28
26
  DEFAULT_MAX_STEPS_THINKER = 100
29
27
  DEFAULT_MAX_STEPS_TASKER = 60
30
28
 
29
+ # Maximum allowed steps per model (hard limits)
30
+ MAX_STEPS_ACTOR = 30
31
+ MAX_STEPS_THINKER = 120
32
+
31
33
  # Reflection intervals
32
34
  DEFAULT_REFLECTION_INTERVAL = 4
33
35
  DEFAULT_REFLECTION_INTERVAL_TASKER = 20
@@ -41,3 +43,6 @@ DEFAULT_TEMPERATURE_LOW = 0.1
41
43
 
42
44
  # Timeout Values
43
45
  HTTP_CLIENT_TIMEOUT = 60
46
+
47
+ # Retry Configuration
48
+ DEFAULT_MAX_RETRIES = 2
oagi/handler/__init__.py CHANGED
@@ -14,6 +14,21 @@ from oagi.handler.pyautogui_action_handler import (
14
14
  )
15
15
  from oagi.handler.screenshot_maker import ScreenshotMaker
16
16
 
17
+
18
+ def reset_handler(handler) -> None:
19
+ """Reset handler state if supported.
20
+
21
+ Uses duck-typing to check if the handler has a reset() method.
22
+ This allows handlers to reset their internal state (e.g., capslock state)
23
+ at the start of a new automation task.
24
+
25
+ Args:
26
+ handler: The action handler to reset
27
+ """
28
+ if hasattr(handler, "reset"):
29
+ handler.reset()
30
+
31
+
17
32
  __all__ = [
18
33
  "PILImage",
19
34
  "PyautoguiActionHandler",
@@ -21,4 +36,5 @@ __all__ = [
21
36
  "AsyncPyautoguiActionHandler",
22
37
  "ScreenshotMaker",
23
38
  "AsyncScreenshotMaker",
39
+ "reset_handler",
24
40
  ]