PyPI - oagi-core - Versions diffs - 0.10.3__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

oagi-core 0.10.3py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

oagi/__init__.py +1 -3
oagi/actor/__init__.py +21 -0
oagi/{task → actor}/async_.py +23 -7
oagi/{task → actor}/async_short.py +1 -1
oagi/actor/base.py +222 -0
oagi/{task → actor}/short.py +1 -1
oagi/{task → actor}/sync.py +21 -5
oagi/agent/default.py +5 -0
oagi/agent/factories.py +75 -3
oagi/agent/observer/exporters.py +6 -0
oagi/agent/observer/report_template.html +19 -0
oagi/agent/tasker/planner.py +31 -19
oagi/agent/tasker/taskee_agent.py +26 -7
oagi/agent/tasker/tasker_agent.py +4 -0
oagi/cli/agent.py +54 -30
oagi/client/async_.py +54 -96
oagi/client/base.py +81 -133
oagi/client/sync.py +52 -99
oagi/constants.py +7 -2
oagi/handler/__init__.py +16 -0
oagi/handler/_macos.py +137 -0
oagi/handler/_windows.py +101 -0
oagi/handler/async_pyautogui_action_handler.py +8 -0
oagi/handler/capslock_manager.py +55 -0
oagi/handler/pyautogui_action_handler.py +21 -39
oagi/server/session_store.py +3 -3
oagi/server/socketio_server.py +4 -4
oagi/task/__init__.py +22 -8
oagi/types/__init__.py +2 -1
oagi/types/models/__init__.py +0 -2
oagi/types/models/action.py +4 -1
oagi/types/models/client.py +1 -17
oagi/types/step_observer.py +2 -0
oagi/types/url.py +25 -0
oagi/utils/__init__.py +12 -0
oagi/utils/output_parser.py +166 -0
oagi/utils/prompt_builder.py +44 -0
{oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/METADATA +90 -10
oagi_core-0.12.0.dist-info/RECORD +76 -0
oagi/task/base.py +0 -158
oagi_core-0.10.3.dist-info/RECORD +0 -70
{oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/WHEEL +0 -0
{oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/entry_points.txt +0 -0
{oagi_core-0.10.3.dist-info → oagi_core-0.12.0.dist-info}/licenses/LICENSE +0 -0

oagi/client/base.py CHANGED Viewed

@@ -11,7 +11,12 @@ from typing import Any, Generic, TypeVar
 import httpx
-from ..constants import API_KEY_HELP_URL, DEFAULT_BASE_URL, HTTP_CLIENT_TIMEOUT
+from ..constants import (
+    API_KEY_HELP_URL,
+    DEFAULT_BASE_URL,
+    DEFAULT_MAX_RETRIES,
+    HTTP_CLIENT_TIMEOUT,
+)
 from ..exceptions import (
     APIError,
     AuthenticationError,
@@ -27,9 +32,11 @@ from ..logging import get_logger
 from ..types.models import (
     ErrorResponse,
     GenerateResponse,
-    LLMResponse,
     UploadFileResponse,
+    Usage,
 )
+from ..types.models.step import Step
+from ..utils.output_parser import parse_raw_output
 logger = get_logger("client.base")
@@ -40,7 +47,12 @@ HttpClientT = TypeVar("HttpClientT")
 class BaseClient(Generic[HttpClientT]):
     """Base class with shared business logic for sync/async clients."""
-    def __init__(self, base_url: str | None = None, api_key: str | None = None):
+    def __init__(
+        self,
+        base_url: str | None = None,
+        api_key: str | None = None,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+    ):
         # Get from environment if not provided
         self.base_url = base_url or os.getenv("OAGI_BASE_URL") or DEFAULT_BASE_URL
         self.api_key = api_key or os.getenv("OAGI_API_KEY")
@@ -55,6 +67,7 @@ class BaseClient(Generic[HttpClientT]):
         self.base_url = self.base_url.rstrip("/")
         self.timeout = HTTP_CLIENT_TIMEOUT
+        self.max_retries = max_retries
         self.client: HttpClientT  # Will be set by subclasses
         logger.info(f"Client initialized with base_url: {self.base_url}")
@@ -67,39 +80,77 @@ class BaseClient(Generic[HttpClientT]):
             headers["x-api-key"] = self.api_key
         return headers
-    def _build_payload(
+    @staticmethod
+    def _log_trace_id(response) -> None:
+        """Log trace IDs from response headers for debugging."""
+        logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
+        logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
+    def _build_chat_completion_kwargs(
         self,
         model: str,
-        messages_history: list,
-        task_description: str | None = None,
-        task_id: str | None = None,
+        messages: list,
         temperature: float | None = None,
-    ) -> dict[str, Any]:
-        """Build OpenAI-compatible request payload.
+        task_id: str | None = None,
+    ) -> dict:
+        """Build kwargs dict for OpenAI chat completion call.
         Args:
-            model: Model to use
-            messages_history: OpenAI-compatible message history
-            task_description: Task description
-            task_id: Task ID for continuing session
-            temperature: Sampling temperature
+            model: Model to use for inference
+            messages: Full message history (OpenAI-compatible format)
+            temperature: Sampling temperature (0.0-2.0)
+            task_id: Optional task ID for multi-turn conversations
         Returns:
-            OpenAI-compatible request payload
+            Dict of kwargs for chat.completions.create()
         """
-        payload: dict[str, Any] = {
-            "model": model,
-            "messages": messages_history,
-        }
-        if task_description is not None:
-            payload["task_description"] = task_description
-        if task_id is not None:
-            payload["task_id"] = task_id
+        kwargs: dict = {"model": model, "messages": messages}
         if temperature is not None:
-            payload["temperature"] = temperature
+            kwargs["temperature"] = temperature
+        if task_id is not None:
+            kwargs["extra_body"] = {"task_id": task_id}
+        return kwargs
+    def _parse_chat_completion_response(
+        self, response
+    ) -> tuple[Step, str, Usage | None]:
+        """Extract and parse OpenAI chat completion response, and log success.
+        This is sync/async agnostic as it only processes the response object.
+        Args:
+            response: OpenAI ChatCompletion response object
-        return payload
+        Returns:
+            Tuple of (Step, raw_output, Usage)
+        """
+        raw_output = response.choices[0].message.content or ""
+        step = parse_raw_output(raw_output)
+        # Extract task_id from response (custom field from OAGI API)
+        task_id = getattr(response, "task_id", None)
+        usage = None
+        if response.usage:
+            usage = Usage(
+                prompt_tokens=response.usage.prompt_tokens,
+                completion_tokens=response.usage.completion_tokens,
+                total_tokens=response.usage.total_tokens,
+            )
+        # Log success with task_id and usage
+        usage_str = (
+            f", tokens: {usage.prompt_tokens}+{usage.completion_tokens}"
+            if usage
+            else ""
+        )
+        task_str = f"task_id: {task_id}, " if task_id else ""
+        logger.info(
+            f"Chat completion successful - {task_str}actions: {len(step.actions)}, "
+            f"stop: {step.stop}{usage_str}"
+        )
+        return step, raw_output, usage
     def _handle_response_error(
         self, response: httpx.Response, response_data: dict
@@ -141,84 +192,6 @@ class BaseClient(Generic[HttpClientT]):
         return status_map.get(status_code, APIError)
-    def _log_request_info(self, model: str, task_description: Any, task_id: Any):
-        logger.info(f"Making API request to /v2/message with model: {model}")
-        logger.debug(
-            f"Request includes task_description: {task_description is not None}, "
-            f"task_id: {task_id is not None}"
-        )
-    def _build_user_message(
-        self, screenshot_url: str, instruction: str | None
-    ) -> dict[str, Any]:
-        """Build OpenAI-compatible user message with screenshot and optional instruction.
-        Args:
-            screenshot_url: URL of uploaded screenshot
-            instruction: Optional text instruction
-        Returns:
-            User message dict
-        """
-        content = [{"type": "image_url", "image_url": {"url": screenshot_url}}]
-        if instruction:
-            content.append({"type": "text", "text": instruction})
-        return {"role": "user", "content": content}
-    def _prepare_message_payload(
-        self,
-        model: str,
-        upload_file_response: UploadFileResponse | None,
-        task_description: str | None,
-        task_id: str | None,
-        instruction: str | None,
-        messages_history: list | None,
-        temperature: float | None,
-        api_version: str | None,
-        screenshot_url: str | None = None,
-    ) -> tuple[dict[str, str], dict[str, Any]]:
-        """Prepare headers and payload for /v2/message request.
-        Args:
-            model: Model to use
-            upload_file_response: Response from S3 upload (if screenshot was uploaded)
-            task_description: Task description
-            task_id: Task ID
-            instruction: Optional instruction
-            messages_history: Message history
-            temperature: Sampling temperature
-            api_version: API version
-            screenshot_url: Direct screenshot URL (alternative to upload_file_response)
-        Returns:
-            Tuple of (headers, payload)
-        """
-        # Use provided screenshot_url or get from upload_file_response
-        if screenshot_url is None:
-            if upload_file_response is None:
-                raise ValueError(
-                    "Either screenshot_url or upload_file_response must be provided"
-                )
-            screenshot_url = upload_file_response.download_url
-        # Build user message and append to history
-        if messages_history is None:
-            messages_history = []
-        user_message = self._build_user_message(screenshot_url, instruction)
-        messages_history.append(user_message)
-        # Build payload and headers
-        headers = self._build_headers(api_version)
-        payload = self._build_payload(
-            model=model,
-            messages_history=messages_history,
-            task_description=task_description,
-            task_id=task_id,
-            temperature=temperature,
-        )
-        return headers, payload
     def _parse_response_json(self, response: httpx.Response) -> dict[str, Any]:
         try:
             return response.json()
@@ -230,35 +203,6 @@ class BaseClient(Generic[HttpClientT]):
                 response=response,
             )
-    def _process_response(self, response: httpx.Response) -> "LLMResponse":
-        response_data = self._parse_response_json(response)
-        # Check if it's an error response (non-200 status)
-        if response.status_code != 200:
-            self._handle_response_error(response, response_data)
-        # Parse successful response
-        result = LLMResponse(**response_data)
-        # Check if the response contains an error (even with 200 status)
-        if result.error:
-            logger.error(
-                f"API Error in response: [{result.error.code}]: {result.error.message}"
-            )
-            raise APIError(
-                result.error.message,
-                code=result.error.code,
-                status_code=200,
-                response=response,
-            )
-        logger.info(
-            f"API request successful - task_id: {result.task_id}, "
-            f"complete: {result.is_complete}"
-        )
-        logger.debug(f"Response included {len(result.actions)} actions")
-        return result
     def _process_upload_response(self, response: httpx.Response) -> UploadFileResponse:
         """Process response from /v1/file/upload endpoint.
@@ -449,7 +393,11 @@ class BaseClient(Generic[HttpClientT]):
         # Parse successful response
         result = GenerateResponse(**response_data)
+        # Capture request_id from response header
+        result.request_id = response.headers.get("X-Request-ID")
         logger.info(
             f"Generate request successful - tokens: {result.prompt_tokens}+{result.completion_tokens}, "
+            f"request_id: {result.request_id}"
         )
         return result

oagi/client/sync.py CHANGED Viewed

@@ -9,28 +9,24 @@
 from functools import wraps
 import httpx
-from httpx import Response
+from httpx import HTTPTransport
+from openai import OpenAI
 from ..constants import (
-    API_HEALTH_ENDPOINT,
     API_V1_FILE_UPLOAD_ENDPOINT,
     API_V1_GENERATE_ENDPOINT,
-    API_V2_MESSAGE_ENDPOINT,
+    DEFAULT_MAX_RETRIES,
     HTTP_CLIENT_TIMEOUT,
 )
 from ..logging import get_logger
 from ..types import Image
-from ..types.models import GenerateResponse, LLMResponse, UploadFileResponse
+from ..types.models import GenerateResponse, UploadFileResponse, Usage
+from ..types.models.step import Step
 from .base import BaseClient
 logger = get_logger("sync_client")
-def _log_trace_id(response: Response):
-    logger.error(f"Request Id: {response.headers.get('x-request-id', '')}")
-    logger.error(f"Trace Id: {response.headers.get('x-trace-id', '')}")
 def log_trace_on_failure(func):
     """Decorator that logs trace ID when a method fails."""
@@ -41,7 +37,7 @@ def log_trace_on_failure(func):
         except Exception as e:
             # Try to get response from the exception if it has one
             if (response := getattr(e, "response", None)) is not None:
-                _log_trace_id(response)
+                BaseClient._log_trace_id(response)
             raise
     return wrapper
@@ -50,113 +46,70 @@ def log_trace_on_failure(func):
 class SyncClient(BaseClient[httpx.Client]):
     """Synchronous HTTP client for the OAGI API."""
-    def __init__(self, base_url: str | None = None, api_key: str | None = None):
-        super().__init__(base_url, api_key)
-        self.client = httpx.Client(base_url=self.base_url)
-        self.upload_client = httpx.Client(timeout=HTTP_CLIENT_TIMEOUT)
+    def __init__(
+        self,
+        base_url: str | None = None,
+        api_key: str | None = None,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+    ):
+        super().__init__(base_url, api_key, max_retries)
+        # OpenAI client for chat completions (with retries)
+        self.openai_client = OpenAI(
+            api_key=self.api_key,
+            base_url=f"{self.base_url}/v1",
+            max_retries=self.max_retries,
+        )
+        # httpx clients for S3 uploads and other endpoints (with retries)
+        transport = HTTPTransport(retries=self.max_retries)
+        self.http_client = httpx.Client(transport=transport, base_url=self.base_url)
+        self.upload_client = httpx.Client(
+            transport=transport, timeout=HTTP_CLIENT_TIMEOUT
+        )
         logger.info(f"SyncClient initialized with base_url: {self.base_url}")
     def __enter__(self):
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
-        self.client.close()
-        self.upload_client.close()
+        self.close()
     def close(self):
-        """Close the underlying httpx clients."""
-        self.client.close()
+        """Close the underlying clients."""
+        self.openai_client.close()
+        self.http_client.close()
         self.upload_client.close()
-    @log_trace_on_failure
-    def create_message(
+    def chat_completion(
         self,
         model: str,
-        screenshot: bytes | None = None,
-        screenshot_url: str | None = None,
-        task_description: str | None = None,
-        task_id: str | None = None,
-        instruction: str | None = None,
-        messages_history: list | None = None,
+        messages: list,
         temperature: float | None = None,
-        api_version: str | None = None,
-    ) -> LLMResponse | None:
+        task_id: str | None = None,
+    ) -> tuple[Step, str, Usage | None]:
         """
-        Call the /v2/message endpoint to analyze task and screenshot
+        Call OpenAI-compatible /v1/chat/completions endpoint.
         Args:
-            model: The model to use for task analysis
-            screenshot: Screenshot image bytes (mutually exclusive with screenshot_url)
-            screenshot_url: Direct URL to screenshot (mutually exclusive with screenshot)
-            task_description: Description of the task (required for new sessions)
-            task_id: Task ID for continuing existing task
-            instruction: Additional instruction when continuing a session
-            messages_history: OpenAI-compatible chat message history
-            temperature: Sampling temperature (0.0-2.0) for LLM inference
-            api_version: API version header
+            model: Model to use for inference
+            messages: Full message history (OpenAI-compatible format)
+            temperature: Sampling temperature (0.0-2.0)
+            task_id: Optional task ID for multi-turn conversations
         Returns:
-            LLMResponse: The response from the API
-        Raises:
-            ValueError: If both or neither screenshot and screenshot_url are provided
-            httpx.HTTPStatusError: For HTTP error responses
+            Tuple of (Step, raw_output, Usage)
+            - Step: Parsed actions and reasoning
+            - raw_output: Raw model output string (for message history)
+            - Usage: Token usage statistics (or None if not available)
         """
-        # Validate that exactly one is provided
-        if (screenshot is None) == (screenshot_url is None):
-            raise ValueError(
-                "Exactly one of 'screenshot' or 'screenshot_url' must be provided"
-            )
-        self._log_request_info(model, task_description, task_id)
-        # Upload screenshot to S3 if bytes provided, otherwise use URL directly
-        upload_file_response = None
-        if screenshot is not None:
-            upload_file_response = self.put_s3_presigned_url(screenshot, api_version)
-        # Prepare message payload
-        headers, payload = self._prepare_message_payload(
-            model=model,
-            upload_file_response=upload_file_response,
-            task_description=task_description,
-            task_id=task_id,
-            instruction=instruction,
-            messages_history=messages_history,
-            temperature=temperature,
-            api_version=api_version,
-            screenshot_url=screenshot_url,
+        logger.info(f"Making chat completion request with model: {model}")
+        kwargs = self._build_chat_completion_kwargs(
+            model, messages, temperature, task_id
         )
-        # Make request
-        try:
-            response = self.client.post(
-                API_V2_MESSAGE_ENDPOINT,
-                json=payload,
-                headers=headers,
-                timeout=self.timeout,
-            )
-            return self._process_response(response)
-        except (httpx.TimeoutException, httpx.NetworkError) as e:
-            self._handle_upload_http_errors(e)
-    def health_check(self) -> dict:
-        """
-        Call the /health endpoint for health check
-        Returns:
-            dict: Health check response
-        """
-        logger.debug("Making health check request")
-        try:
-            response = self.client.get(API_HEALTH_ENDPOINT)
-            response.raise_for_status()
-            result = response.json()
-            logger.debug("Health check successful")
-            return result
-        except httpx.HTTPStatusError as e:
-            logger.warning(f"Health check failed: {e}")
-            raise
+        response = self.openai_client.chat.completions.create(**kwargs)
+        return self._parse_chat_completion_response(response)
     def get_s3_presigned_url(
         self,
@@ -175,7 +128,7 @@ class SyncClient(BaseClient[httpx.Client]):
         try:
             headers = self._build_headers(api_version)
-            response = self.client.get(
+            response = self.http_client.get(
                 API_V1_FILE_UPLOAD_ENDPOINT, headers=headers, timeout=self.timeout
             )
             return self._process_upload_response(response)
@@ -295,7 +248,7 @@ class SyncClient(BaseClient[httpx.Client]):
         # Make request
         try:
-            response = self.client.post(
+            response = self.http_client.post(
                 API_V1_GENERATE_ENDPOINT,
                 json=payload,
                 headers=headers,

oagi/constants.py CHANGED Viewed

@@ -9,10 +9,8 @@
 # URLs & API Endpoints
 DEFAULT_BASE_URL = "https://api.agiopen.org"
 API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys"
-API_V2_MESSAGE_ENDPOINT = "/v2/message"
 API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload"
 API_V1_GENERATE_ENDPOINT = "/v1/generate"
-API_HEALTH_ENDPOINT = "/health"
 # Model identifiers
 MODEL_ACTOR = "lux-actor-1"
@@ -28,6 +26,10 @@ DEFAULT_MAX_STEPS = 20
 DEFAULT_MAX_STEPS_THINKER = 100
 DEFAULT_MAX_STEPS_TASKER = 60
+# Maximum allowed steps per model (hard limits)
+MAX_STEPS_ACTOR = 30
+MAX_STEPS_THINKER = 120
 # Reflection intervals
 DEFAULT_REFLECTION_INTERVAL = 4
 DEFAULT_REFLECTION_INTERVAL_TASKER = 20
@@ -41,3 +43,6 @@ DEFAULT_TEMPERATURE_LOW = 0.1
 # Timeout Values
 HTTP_CLIENT_TIMEOUT = 60
+# Retry Configuration
+DEFAULT_MAX_RETRIES = 2

oagi/handler/__init__.py CHANGED Viewed

@@ -14,6 +14,21 @@ from oagi.handler.pyautogui_action_handler import (
 )
 from oagi.handler.screenshot_maker import ScreenshotMaker
+def reset_handler(handler) -> None:
+    """Reset handler state if supported.
+    Uses duck-typing to check if the handler has a reset() method.
+    This allows handlers to reset their internal state (e.g., capslock state)
+    at the start of a new automation task.
+    Args:
+        handler: The action handler to reset
+    """
+    if hasattr(handler, "reset"):
+        handler.reset()
 __all__ = [
     "PILImage",
     "PyautoguiActionHandler",
@@ -21,4 +36,5 @@ __all__ = [
     "AsyncPyautoguiActionHandler",
     "ScreenshotMaker",
     "AsyncScreenshotMaker",
+    "reset_handler",
 ]

oagi-core 0.10.3__py3-none-any.whl → 0.12.0__py3-none-any.whl

oagi-core 0.10.3py3-none-any.whl → 0.12.0py3-none-any.whl