PyPI - avtomatika-worker - Versions diffs - 1.0a2__py3-none-any.whl - Mend

avtomatika-worker 1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

avtomatika_worker/__init__.py +12 -0
avtomatika_worker/config.py +110 -0
avtomatika_worker/types.py +4 -0
avtomatika_worker/worker.py +408 -0
avtomatika_worker-1.0a2.dist-info/METADATA +307 -0
avtomatika_worker-1.0a2.dist-info/RECORD +9 -0
avtomatika_worker-1.0a2.dist-info/WHEEL +5 -0
avtomatika_worker-1.0a2.dist-info/licenses/LICENSE +21 -0
avtomatika_worker-1.0a2.dist-info/top_level.txt +1 -0

avtomatika_worker/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""A Python SDK for creating workers for the Py-Orchestrator."""
+from importlib.metadata import PackageNotFoundError, version
+from .worker import Worker
+__all__ = ["Worker"]
+try:
+    __version__ = version("avtomatika-worker")
+except PackageNotFoundError:
+    __version__ = "unknown"

avtomatika_worker/config.py ADDED Viewed

@@ -0,0 +1,110 @@
+from _socket import gaierror, gethostbyname, gethostname
+from json import JSONDecodeError, loads
+from os import getenv
+from typing import Any
+from uuid import uuid4
+class WorkerConfig:
+    """A class for centralized management of worker configuration.
+    Reads parameters from environment variables and provides default values.
+    """
+    def __init__(self):
+        # --- Basic worker information ---
+        self.worker_id: str = getenv("WORKER_ID", f"worker-{uuid4()}")
+        self.worker_type: str = getenv("WORKER_TYPE", "generic-cpu-worker")
+        self.worker_port: int = int(getenv("WORKER_PORT", "8083"))
+        self.hostname: str = gethostname()
+        try:
+            self.ip_address: str = gethostbyname(self.hostname)
+        except gaierror:
+            self.ip_address: str = "127.0.0.1"
+        # --- Orchestrator settings ---
+        self.orchestrators: list[dict[str, Any]] = self._get_orchestrators_config()
+        # --- Security ---
+        self.worker_token: str = getenv(
+            "WORKER_INDIVIDUAL_TOKEN",
+            getenv("WORKER_TOKEN", "your-secret-worker-token"),
+        )
+        # --- Resources and performance ---
+        self.cost_per_second: float = float(getenv("WORKER_COST_PER_SECOND", "0.01"))
+        self.max_concurrent_tasks: int = int(getenv("MAX_CONCURRENT_TASKS", "10"))
+        self.resources: dict[str, Any] = {
+            "cpu_cores": int(getenv("CPU_CORES", "4")),
+            "gpu_info": self._get_gpu_info(),
+        }
+        # --- Installed software and models (read as JSON strings) ---
+        self.installed_software: dict[str, str] = self._load_json_from_env(
+            "INSTALLED_SOFTWARE",
+            default={"python": "3.9"},
+        )
+        self.installed_models: list[dict[str, str]] = self._load_json_from_env(
+            "INSTALLED_MODELS",
+            default=[],
+        )
+        # --- Tuning parameters ---
+        self.heartbeat_interval: float = float(getenv("HEARTBEAT_INTERVAL", "15"))
+        self.result_max_retries: int = int(getenv("RESULT_MAX_RETRIES", "5"))
+        self.result_retry_initial_delay: float = float(
+            getenv("RESULT_RETRY_INITIAL_DELAY", "1.0"),
+        )
+        self.heartbeat_debounce_delay: float = float(getenv("WORKER_HEARTBEAT_DEBOUNCE_DELAY", 0.1))
+        self.task_poll_timeout: float = float(getenv("TASK_POLL_TIMEOUT", "30"))
+        self.task_poll_error_delay: float = float(
+            getenv("TASK_POLL_ERROR_DELAY", "5.0"),
+        )
+        self.idle_poll_delay: float = float(getenv("IDLE_POLL_DELAY", "0.01"))
+        self.enable_websockets: bool = getenv("WORKER_ENABLE_WEBSOCKETS", "false").lower() == "true"
+        self.multi_orchestrator_mode: str = getenv("MULTI_ORCHESTRATOR_MODE", "FAILOVER")
+    def _get_orchestrators_config(self) -> list[dict[str, Any]]:
+        """
+        Loads orchestrator configuration from the ORCHESTRATORS_CONFIG environment variable.
+        For backward compatibility, if it is not set, it uses ORCHESTRATOR_URL.
+        """
+        orchestrators_json = getenv("ORCHESTRATORS_CONFIG")
+        if orchestrators_json:
+            try:
+                orchestrators = loads(orchestrators_json)
+                for o in orchestrators:
+                    if "priority" not in o:
+                        o["priority"] = 10
+                orchestrators.sort(key=lambda x: (x.get("priority", 10), x.get("url")))
+                return orchestrators
+            except JSONDecodeError:
+                print("Warning: Could not decode JSON from ORCHESTRATORS_CONFIG. Falling back to default.")
+        orchestrator_url = getenv("ORCHESTRATOR_URL", "http://localhost:8080")
+        return [{"url": orchestrator_url, "priority": 1}]
+    def _get_gpu_info(self) -> dict[str, Any] | None:
+        """Collects GPU information from environment variables.
+        Returns None if GPU is not configured.
+        """
+        gpu_model = getenv("GPU_MODEL")
+        if not gpu_model:
+            return None
+        return {
+            "model": gpu_model,
+            "vram_gb": int(getenv("GPU_VRAM_GB", "0")),
+        }
+    def _load_json_from_env(self, key: str, default: Any) -> Any:
+        """Safely loads a JSON string from an environment variable."""
+        value = getenv(key)
+        if value:
+            try:
+                return loads(value)
+            except JSONDecodeError:
+                print(
+                    f"Warning: Could not decode JSON from environment variable {key}.",
+                )
+                return default
+        return default

avtomatika_worker/types.py ADDED Viewed

@@ -0,0 +1,4 @@
+# Error codes for worker task results
+TRANSIENT_ERROR = "TRANSIENT_ERROR"
+PERMANENT_ERROR = "PERMANENT_ERROR"
+INVALID_INPUT_ERROR = "INVALID_INPUT_ERROR"

avtomatika_worker/worker.py ADDED Viewed

@@ -0,0 +1,408 @@
+from asyncio import CancelledError, Event, Task, create_task, gather, run, sleep
+from asyncio import TimeoutError as AsyncTimeoutError
+from json import JSONDecodeError
+from logging import getLogger
+from typing import Any, Callable
+from aiohttp import ClientError, ClientSession, ClientTimeout, ClientWebSocketResponse, WSMsgType, web
+from .config import WorkerConfig
+# Logging setup
+logger = getLogger(__name__)
+class Worker:
+    """The main class for creating and running a worker.
+    Implements a hybrid interaction model with the Orchestrator:
+    - PULL model for fetching tasks.
+    - WebSocket for real-time commands (cancellation) and sending progress.
+    """
+    def __init__(
+        self,
+        worker_type: str = "generic-worker",
+        max_concurrent_tasks: int | None = None,
+        task_type_limits: dict[str, int] | None = None,
+        http_session: ClientSession | None = None,
+        skill_dependencies: dict[str, list[str]] | None = None,
+    ):
+        self._config = WorkerConfig()
+        self._config.worker_type = worker_type  # Allow overriding worker_type
+        if max_concurrent_tasks is not None:
+            self._config.max_concurrent_tasks = max_concurrent_tasks
+        self._task_type_limits = task_type_limits or {}
+        self._task_handlers: dict[str, dict[str, Any]] = {}
+        self._skill_dependencies = skill_dependencies or {}
+        # Worker state
+        self._current_load = 0
+        self._current_load_by_type: dict[str, int] = dict.fromkeys(self._task_type_limits, 0)
+        self._hot_cache: set[str] = set()
+        self._active_tasks: dict[str, Task] = {}
+        self._http_session = http_session
+        self._session_is_managed_externally = http_session is not None
+        self._ws_connection: ClientWebSocketResponse | None = None
+        self._headers = {"X-Worker-Token": self._config.worker_token}
+        self._shutdown_event = Event()
+        self._registered_event = Event()
+        self._round_robin_index = 0
+        self._debounce_task: Task | None = None
+    def _validate_config(self):
+        """Checks for unused task type limits and warns the user."""
+        registered_task_types = {
+            handler_data["type"] for handler_data in self._task_handlers.values() if handler_data["type"]
+        }
+        for task_type in self._task_type_limits:
+            if task_type not in registered_task_types:
+                logger.warning(
+                    f"Configuration warning: A limit is defined for task type '{task_type}', "
+                    "but no tasks are registered with this type."
+                )
+    def task(self, name: str, task_type: str | None = None) -> Callable:
+        """Decorator to register a function as a task handler."""
+        def decorator(func: Callable) -> Callable:
+            logger.info(f"Registering task: '{name}' (type: {task_type or 'N/A'})")
+            if task_type and task_type not in self._task_type_limits:
+                logger.warning(
+                    f"Task '{name}' has a type '{task_type}' which is not defined in 'task_type_limits'. "
+                    "No concurrency limit will be applied for this type."
+                )
+            if task_type and task_type not in self._current_load_by_type:
+                self._current_load_by_type[task_type] = 0
+            self._task_handlers[name] = {"func": func, "type": task_type}
+            return func
+        return decorator
+    def add_to_hot_cache(self, model_name: str):
+        """Adds a model to the hot cache."""
+        self._hot_cache.add(model_name)
+        self._schedule_heartbeat_debounce()
+    def remove_from_hot_cache(self, model_name: str):
+        """Removes a model from the hot cache."""
+        self._hot_cache.discard(model_name)
+        self._schedule_heartbeat_debounce()
+    def get_hot_cache(self) -> set[str]:
+        """Returns the hot cache."""
+        return self._hot_cache
+    def _get_current_state(self) -> dict[str, Any]:
+        """
+        Calculates the current worker state including status and available tasks.
+        """
+        if self._current_load >= self._config.max_concurrent_tasks:
+            return {"status": "busy", "supported_tasks": []}
+        supported_tasks = []
+        for name, handler_data in self._task_handlers.items():
+            is_available = True
+            task_type = handler_data.get("type")
+            if task_type and task_type in self._task_type_limits:
+                limit = self._task_type_limits[task_type]
+                current_load = self._current_load_by_type.get(task_type, 0)
+                if current_load >= limit:
+                    is_available = False
+            if is_available:
+                supported_tasks.append(name)
+        status = "idle" if supported_tasks else "busy"
+        return {"status": status, "supported_tasks": supported_tasks}
+    async def _debounced_heartbeat_sender(self):
+        """Waits for the debounce delay then sends a heartbeat."""
+        await sleep(self._config.heartbeat_debounce_delay)
+        await self._send_heartbeats_to_all()
+    def _schedule_heartbeat_debounce(self):
+        """Schedules a debounced heartbeat, cancelling any pending one."""
+        # Cancel the previously scheduled task, if it exists and is not done.
+        if self._debounce_task and not self._debounce_task.done():
+            self._debounce_task.cancel()
+        # Schedule the new debounced call.
+        self._debounce_task = create_task(self._debounced_heartbeat_sender())
+    async def _poll_for_tasks(self, orchestrator_url: str):
+        """Polls a specific Orchestrator for new tasks."""
+        url = f"{orchestrator_url}/_worker/workers/{self._config.worker_id}/tasks/next"
+        try:
+            if not self._http_session:
+                return
+            timeout = ClientTimeout(total=self._config.task_poll_timeout + 5)
+            async with self._http_session.get(url, headers=self._headers, timeout=timeout) as resp:
+                if resp.status == 200:
+                    task_data = await resp.json()
+                    task_data["orchestrator_url"] = orchestrator_url
+                    self._current_load += 1
+                    task_handler_info = self._task_handlers.get(task_data["type"])
+                    if task_handler_info:
+                        task_type_for_limit = task_handler_info.get("type")
+                        if task_type_for_limit:
+                            self._current_load_by_type[task_type_for_limit] += 1
+                    self._schedule_heartbeat_debounce()
+                    task = create_task(self._process_task(task_data))
+                    self._active_tasks[task_data["task_id"]] = task
+                elif resp.status != 204:
+                    await sleep(self._config.task_poll_error_delay)
+        except (AsyncTimeoutError, ClientError) as e:
+            logger.error(f"Error polling for tasks: {e}")
+            await sleep(self._config.task_poll_error_delay)
+    async def _start_polling(self):
+        print("Waiting for registration")
+        """The main loop for polling tasks."""
+        await self._registered_event.wait()
+        print("Polling started")
+        while not self._shutdown_event.is_set():
+            if self._get_current_state()["status"] == "busy":
+                await sleep(self._config.idle_poll_delay)
+                continue
+            if self._config.multi_orchestrator_mode == "ROUND_ROBIN":
+                orchestrator = self._config.orchestrators[self._round_robin_index]
+                await self._poll_for_tasks(orchestrator["url"])
+                self._round_robin_index = (self._round_robin_index + 1) % len(self._config.orchestrators)
+            else:
+                for orchestrator in self._config.orchestrators:
+                    if self._get_current_state()["status"] == "busy":
+                        break
+                    await self._poll_for_tasks(orchestrator["url"])
+            if self._current_load == 0:
+                await sleep(self._config.idle_poll_delay)
+    async def _process_task(self, task_data: dict[str, Any]):
+        """Executes the task logic."""
+        task_id, job_id, task_name = task_data["task_id"], task_data["job_id"], task_data["type"]
+        params, orchestrator_url = task_data.get("params", {}), task_data["orchestrator_url"]
+        result: dict[str, Any] = {}
+        handler_data = self._task_handlers.get(task_name)
+        task_type_for_limit = handler_data.get("type") if handler_data else None
+        try:
+            if handler_data:
+                result = await handler_data["func"](
+                    params,
+                    task_id=task_id,
+                    job_id=job_id,
+                    priority=task_data.get("priority", 0),
+                    send_progress=self.send_progress,
+                    add_to_hot_cache=self.add_to_hot_cache,
+                    remove_from_hot_cache=self.remove_from_hot_cache,
+                )
+            else:
+                result = {"status": "failure", "error_message": f"Unsupported task: {task_name}"}
+        except CancelledError:
+            result = {"status": "cancelled"}
+        except Exception as e:
+            result = {"status": "failure", "error": {"code": "TRANSIENT_ERROR", "message": str(e)}}
+        finally:
+            payload = {"job_id": job_id, "task_id": task_id, "worker_id": self._config.worker_id, "result": result}
+            await self._send_result(payload, orchestrator_url)
+            self._active_tasks.pop(task_id, None)
+            self._current_load -= 1
+            if task_type_for_limit:
+                self._current_load_by_type[task_type_for_limit] -= 1
+            self._schedule_heartbeat_debounce()
+    async def _send_result(self, payload: dict[str, Any], orchestrator_url: str):
+        """Sends the result to a specific orchestrator."""
+        url = f"{orchestrator_url}/_worker/tasks/result"
+        delay = self._config.result_retry_initial_delay
+        for i in range(self._config.result_max_retries):
+            try:
+                if self._http_session and not self._http_session.closed:
+                    async with self._http_session.post(url, json=payload, headers=self._headers) as resp:
+                        if resp.status == 200:
+                            return
+            except ClientError as e:
+                logger.error(f"Error sending result: {e}")
+            await sleep(delay * (2**i))
+    async def _manage_orchestrator_communications(self):
+        print("Registering worker")
+        """Registers the worker and sends heartbeats."""
+        await self._register_with_all_orchestrators()
+        print("Worker registered")
+        self._registered_event.set()
+        if self._config.enable_websockets:
+            create_task(self._start_websocket_manager())
+        while not self._shutdown_event.is_set():
+            await self._send_heartbeats_to_all()
+            await sleep(self._config.heartbeat_interval)
+    async def _register_with_all_orchestrators(self):
+        """Registers the worker with all orchestrators."""
+        state = self._get_current_state()
+        payload = {
+            "worker_id": self._config.worker_id,
+            "worker_type": self._config.worker_type,
+            "supported_tasks": state["supported_tasks"],
+            "max_concurrent_tasks": self._config.max_concurrent_tasks,
+            "installed_models": self._config.installed_models,
+            "hostname": self._config.hostname,
+            "ip_address": self._config.ip_address,
+            "resources": self._config.resources,
+        }
+        for orchestrator in self._config.orchestrators:
+            url = f"{orchestrator['url']}/_worker/workers/register"
+            try:
+                if self._http_session:
+                    async with self._http_session.post(url, json=payload, headers=self._headers) as resp:
+                        if resp.status >= 400:
+                            logger.error(f"Error registering with {orchestrator['url']}: {resp.status}")
+            except ClientError as e:
+                logger.error(f"Error registering with orchestrator {orchestrator['url']}: {e}")
+    async def _send_heartbeats_to_all(self):
+        print("Sending heartbeats")
+        """Sends heartbeat messages to all orchestrators."""
+        state = self._get_current_state()
+        payload = {
+            "load": self._current_load,
+            "status": state["status"],
+            "supported_tasks": state["supported_tasks"],
+            "hot_cache": list(self._hot_cache),
+        }
+        if self._skill_dependencies:
+            payload["skill_dependencies"] = self._skill_dependencies
+            hot_skills = [
+                skill for skill, models in self._skill_dependencies.items() if set(models).issubset(self._hot_cache)
+            ]
+            if hot_skills:
+                payload["hot_skills"] = hot_skills
+        async def _send_single(orchestrator_url: str):
+            url = f"{orchestrator_url}/_worker/workers/{self._config.worker_id}"
+            try:
+                if self._http_session and not self._http_session.closed:
+                    async with self._http_session.patch(url, json=payload, headers=self._headers) as resp:
+                        if resp.status >= 400:
+                            logger.warning(f"Heartbeat to {orchestrator_url} failed with status: {resp.status}")
+            except ClientError as e:
+                logger.error(f"Error sending heartbeat to orchestrator {orchestrator_url}: {e}")
+        await gather(*[_send_single(o["url"]) for o in self._config.orchestrators])
+    async def main(self):
+        print("Main started")
+        """The main asynchronous function."""
+        self._validate_config()  # Validate config now that all tasks are registered
+        if not self._http_session:
+            self._http_session = ClientSession()
+        print("Starting comm task")
+        comm_task = create_task(self._manage_orchestrator_communications())
+        print("Starting polling task")
+        polling_task = create_task(self._start_polling())
+        await self._shutdown_event.wait()
+        for task in [comm_task, polling_task]:
+            task.cancel()
+        if self._active_tasks:
+            await gather(*self._active_tasks.values(), return_exceptions=True)
+        if self._ws_connection and not self._ws_connection.closed:
+            await self._ws_connection.close()
+        if self._http_session and not self._http_session.closed and not self._session_is_managed_externally:
+            await self._http_session.close()
+    def run(self):
+        """Runs the worker."""
+        try:
+            run(self.main())
+        except KeyboardInterrupt:
+            self._shutdown_event.set()
+            run(sleep(1.5))
+    async def _run_health_check_server(self):
+        app = web.Application()
+        app.router.add_get("/health", lambda r: web.Response(text="OK"))
+        runner = web.AppRunner(app)
+        await runner.setup()
+        site = web.TCPSite(runner, "0.0.0.0", self._config.worker_port)
+        await site.start()
+        await self._shutdown_event.wait()
+        await runner.cleanup()
+    def run_with_health_check(self):
+        async def _main_wrapper():
+            await gather(self._run_health_check_server(), self.main())
+        try:
+            run(_main_wrapper())
+        except KeyboardInterrupt:
+            self._shutdown_event.set()
+            run(sleep(1.5))
+    # WebSocket methods omitted for brevity as they are not relevant to the changes
+    async def _start_websocket_manager(self):
+        """Manages the WebSocket connection to the orchestrator."""
+        while not self._shutdown_event.is_set():
+            for orchestrator in self._config.orchestrators:
+                ws_url = orchestrator["url"].replace("http", "ws", 1) + "/_worker/ws"
+                try:
+                    if self._http_session:
+                        async with self._http_session.ws_connect(ws_url, headers=self._headers) as ws:
+                            self._ws_connection = ws
+                            logger.info(f"WebSocket connection established to {ws_url}")
+                            await self._listen_for_commands()
+                except (ClientError, AsyncTimeoutError) as e:
+                    logger.warning(f"WebSocket connection to {ws_url} failed: {e}")
+                finally:
+                    self._ws_connection = None
+                    logger.info(f"WebSocket connection to {ws_url} closed.")
+                    await sleep(5)  # Reconnection delay
+            if not self._config.orchestrators:
+                await sleep(5)
+    async def _listen_for_commands(self):
+        """Listens for and processes commands from the orchestrator via WebSocket."""
+        if not self._ws_connection:
+            return
+        try:
+            async for msg in self._ws_connection:
+                if msg.type == WSMsgType.TEXT:
+                    try:
+                        command = msg.json()
+                        if command.get("type") == "cancel_task":
+                            task_id = command.get("task_id")
+                            if task_id in self._active_tasks:
+                                self._active_tasks[task_id].cancel()
+                                logger.info(f"Cancelled task {task_id} by orchestrator command.")
+                    except JSONDecodeError:
+                        logger.warning(f"Received invalid JSON over WebSocket: {msg.data}")
+                elif msg.type == WSMsgType.ERROR:
+                    break
+        except Exception as e:
+            logger.error(f"Error in WebSocket listener: {e}")
+    async def send_progress(self, task_id: str, job_id: str, progress: float, message: str = ""):
+        """Sends a progress update to the orchestrator via WebSocket."""
+        if self._ws_connection and not self._ws_connection.closed:
+            try:
+                payload = {
+                    "type": "progress_update",
+                    "task_id": task_id,
+                    "job_id": job_id,
+                    "progress": progress,
+                    "message": message,
+                }
+                await self._ws_connection.send_json(payload)
+            except Exception as e:
+                logger.warning(f"Could not send progress update for task {task_id}: {e}")

avtomatika_worker-1.0a2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,307 @@
+Metadata-Version: 2.4
+Name: avtomatika-worker
+Version: 1.0a2
+Summary: Worker SDK for the Avtomatika orchestrator.
+Project-URL: Homepage, https://github.com/avtomatila-ai/avtomatika-worker
+Project-URL: Bug Tracker, https://github.com/avtomatila-ai/avtomatika-worker/issues
+Classifier: Development Status :: 3 - Alpha
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: aiohttp~=3.13.2
+Requires-Dist: python-json-logger~=4.0.0
+Provides-Extra: test
+Requires-Dist: pytest; extra == "test"
+Requires-Dist: pytest-asyncio; extra == "test"
+Requires-Dist: aioresponses; extra == "test"
+Requires-Dist: pytest-mock; extra == "test"
+Dynamic: license-file
+# Avtomatika Worker SDK
+This is an SDK for creating workers compatible with the **Avtomatika** orchestrator. The SDK handles all the complexity of interacting with the orchestrator, allowing you to focus on writing your business logic.
+## Installation
+```bash
+pip install avtomatika-worker
+```
+## Quick Start
+Creating a worker is simple. You instantiate the `Worker` class and then register your task-handling functions using the `@worker.task` decorator.
+```python
+import asyncio
+from avtomatika_worker import Worker
+# 1. Create a worker instance
+worker = Worker(
+    worker_type="image-processing",
+    skill_dependencies={
+        "resize_image": ["pillow"],
+        "add_watermark": ["pillow", "numpy"],
+    }
+)
+# 2. Register a task handler using the decorator
+@worker.task("resize_image")
+async def image_resizer(params: dict, **kwargs):
+    """
+    An example handler that receives task parameters,
+    performs the work, and returns the result.
+    """
+    task_id = kwargs.get("task_id")
+    job_id = kwargs.get("job_id")
+    print(f"Task {task_id} (Job: {job_id}): resizing image...")
+    print(f"Parameters: {params}")
+    # ... your business logic here ...
+    await asyncio.sleep(1) # Simulate I/O-bound work
+    # Return the result
+    return {
+        "status": "success",
+        "data": {
+            "resized_path": f"/path/to/resized_{params.get('filename')}"
+        }
+    }
+# 3. Run the worker
+if __name__ == "__main__":
+    # The SDK will automatically connect to the orchestrator,
+    # register itself, and start polling for tasks.
+    worker.run_with_health_check()
+```
+## Key Features
+### 1. Task Handlers
+Each handler is an asynchronous function that accepts two arguments:
+-   `params` (`dict`): A dictionary with the parameters that the orchestrator passed for this task.
+-   `**kwargs`: Additional metadata about the task, including:
+    -   `task_id` (`str`): The unique ID of the task itself.
+    -   `job_id` (`str`): The ID of the parent `Job` to which the task belongs.
+    -   `priority` (`int`): The execution priority of the task.
+### 2. Concurrency Limiting
+The worker allows you to control how many tasks are executed in parallel. This can be configured at two levels:
+-   **Global Limit**: A maximum number of tasks that the worker can execute simultaneously, regardless of their type.
+-   **Per-Type Limit**: A specific limit for a group of tasks that share a common resource (e.g., a GPU, a specific API).
+The worker dynamically reports its available capacity to the orchestrator. When a limit is reached, the worker informs the orchestrator that it can no longer accept tasks of that type until a slot becomes free.
+**Example:**
+Let's configure a worker that can run up to **10 tasks in total**, but no more than **1 video processing task** and **4 audio transcription tasks** at the same time.
+```python
+import asyncio
+from avtomatika_worker import Worker
+# 1. Configure limits during initialization
+worker = Worker(
+    worker_type="media-processor",
+    max_concurrent_tasks=10,
+    task_type_limits={
+        "video_processing": 1,
+        "audio_processing": 4,
+    }
+)
+# 2. Assign a type to each task using the decorator
+@worker.task("upscale_video", task_type="video_processing")
+async def upscale_video(params: dict, **kwargs):
+    # This task uses the 'video_processing' slot
+    print("Upscaling video...")
+    await asyncio.sleep(5)
+    return {"status": "success"}
+@worker.task("blur_video_faces", task_type="video_processing")
+async def blur_video_faces(params: dict, **kwargs):
+    # This task also uses the 'video_processing' slot
+    print("Blurring faces in video...")
+    await asyncio.sleep(5)
+    return {"status": "success"}
+@worker.task("transcribe_audio", task_type="audio_processing")
+async def transcribe_audio(params: dict, **kwargs):
+    # This task uses one of the four 'audio_processing' slots
+    print("Transcribing audio...")
+    await asyncio.sleep(2)
+    return {"status": "success"}
+@worker.task("generate_report")
+async def generate_report(params: dict, **kwargs):
+    # This task has no specific type and is only limited by the global limit
+    print("Generating report...")
+    await asyncio.sleep(1)
+    return {"status": "success"}
+if __name__ == "__main__":
+    worker.run_with_health_check()
+```
+In this example, even though the global limit is 10, the orchestrator will only ever send one task (`upscale_video` or `blur_video_faces`) to this worker at a time, because they both share the single "video_processing" slot.
+### 3. Returning Results and Handling Errors
+The result returned by a handler directly influences the subsequent flow of the pipeline in the orchestrator.
+#### Successful Execution
+```python
+return {
+    "status": "success",
+    "data": {"output": "some_value"}
+}
+```
+- The orchestrator will receive this data and use the `"success"` key in the `transitions` dictionary to determine the next step.
+#### Custom Statuses
+You can return custom statuses to implement complex branching logic in the orchestrator.
+```python
+return {
+    "status": "needs_manual_review",
+    "data": {"reason": "Low confidence score"}
+}
+```
+- The orchestrator will look for the `"needs_manual_review"` key in `transitions`.
+#### Error Handling
+To control the orchestrator's fault tolerance mechanism, you can return standardized error types.
+-   **Transient Error (`TRANSIENT_ERROR`)**: For issues that might be resolved on a retry (e.g., a network failure).
+    ```python
+    from avtomatika_worker.typing import TRANSIENT_ERROR
+    return {
+        "status": "failure",
+        "error": {
+            "code": TRANSIENT_ERROR,
+            "message": "External API timeout"
+        }
+    }
+    ```
+-   **Permanent Error (`PERMANENT_ERROR`)**: For unresolvable problems (e.g., an invalid file format).
+    ```python
+    from avtomatika_worker.typing import PERMANENT_ERROR
+    return {
+        "status": "failure",
+        "error": {
+            "code": PERMANENT_ERROR,
+            "message": "Corrupted input file"
+        }
+    }
+    ```
+### 4. Failover and Load Balancing
+The SDK supports connecting to multiple orchestrator instances to ensure high availability (`FAILOVER`) and load balancing (`ROUND_ROBIN`).
+-   **Configuration**: Set via the `ORCHESTrators_CONFIG` environment variable, which must contain a JSON string.
+-   **Mode**: Controlled by the `MULTI_ORCHESTRATOR_MODE` variable.
+**Example `ORCHESTRATORS_CONFIG`:**
+```json
+[
+    {"url": "http://orchestrator-1.my-domain.com:8080", "weight": 100},
+    {"url": "http://orchestrator-2.my-domain.com:8080", "weight": 100}
+]
+```
+-   **`FAILOVER` (default):** The worker will connect to the first orchestrator. If it becomes unavailable, it will automatically switch to the next one in the list.
+-   **`ROUND_ROBIN`:** The worker will send requests to fetch tasks to each orchestrator in turn.
+### 5. Handling Large Files (S3 Payload Offloading)
+The SDK supports working with large files "out of the box" via S3-compatible storage.
+-   **Automatic Download**: If a value in `params` is a URI of the form `s3://...`, the SDK will automatically download the file to the local disk and replace the URI in `params` with the local path.
+-   **Automatic Upload**: If your handler returns a local file path in `data` (located within the `WORKER_PAYLOAD_DIR` directory), the SDK will automatically upload this file to S3 and replace the path with an `s3://` URI in the final result.
+This functionality is transparent to your code and only requires configuring environment variables for S3 access.
+### 6. WebSocket Support
+If enabled, the SDK establishes a persistent WebSocket connection with the orchestrator to receive real-time commands, such as canceling an ongoing task.
+## Advanced Features
+### Reporting Skill & Model Dependencies
+For more advanced scheduling, the worker can report detailed information about its skills and their dependencies on specific models. This allows the orchestrator to make smarter decisions, such as dispatching tasks to workers that already have the required models loaded in memory.
+This is configured via the `skill_dependencies` argument in the `Worker` constructor.
+-   **`skill_dependencies`**: A dictionary where keys are skill names (as registered with `@worker.task`) and values are lists of model names required by that skill.
+Based on this configuration and the current state of the worker's `hot_cache` (the set of models currently loaded in memory), the worker will automatically include two new fields in its heartbeat messages:
+-   **`skill_dependencies`**: The same dictionary provided during initialization.
+-   **`hot_skills`**: A dynamically calculated list of skills that are ready for immediate execution (i.e., all of their dependent models are in the `hot_cache`).
+**Example:**
+Consider a worker configured like this:
+```python
+worker = Worker(
+    worker_type="ai-processor",
+    skill_dependencies={
+        "image_generation": ["stable_diffusion_v1.5", "vae-ft-mse"],
+        "upscale": ["realesrgan_x4"],
+    }
+)
+```
+-   Initially, `hot_cache` is empty. The worker's heartbeat will include `skill_dependencies` but not `hot_skills`.
+-   A task handler calls `add_to_hot_cache("stable_diffusion_v1.5")`. The next heartbeat will still not include `hot_skills` because the `image_generation` skill is only partially loaded.
+-   The handler then calls `add_to_hot_cache("vae-ft-mse")`. Now, all dependencies for `image_generation` are met. The next heartbeat will include:
+    ```json
+    {
+      "hot_skills": ["image_generation"],
+      "skill_dependencies": {
+        "image_generation": ["stable_diffusion_v1.5", "vae-ft-mse"],
+        "upscale": ["realesrgan_x4"]
+      }
+    }
+    ```
+This information is sent automatically. Your task handlers are only responsible for managing the `hot_cache` by calling `add_to_hot_cache()` and `remove_from_hot_cache()`, which are passed as arguments to the handler.
+## Configuration
+The worker is fully configured via environment variables.
+| Variable | Description | Default |
+| --- | --- | --- |
+| `ORCHESTRATOR_URL` | The URL of a single orchestrator (used if `ORCHESTRATORS_CONFIG` is not set). | `http://localhost:8080` |
+| `ORCHESTRATORS_CONFIG`| A JSON string with a list of orchestrators for `FAILOVER` or `ROUND_ROBIN` modes. | `[]` |
+| `MULTI_ORCHESTRATOR_MODE` | The mode for handling multiple orchestrators. Possible values: `FAILOVER`, `ROUND_ROBIN`. | `FAILOVER` |
+| `WORKER_ID` | **(Required)** A unique identifier for the worker. | - |
+| `WORKER_TOKEN` | A common authentication token for all workers. | `default-token` |
+| `WORKER_INDIVIDUAL_TOKEN` | An individual token for this worker (overrides `WORKER_TOKEN`). | - |
+| `WORKER_ENABLE_WEBSOCKETS` | Enable (`true`) or disable (`false`) WebSocket support. | `false` |
+| `WORKER_HEARTBEAT_DEBOUNCE_DELAY` | The delay in seconds for debouncing immediate heartbeats. | `0.1` |
+| `WORKER_PAYLOAD_DIR` | The directory for temporarily storing files when working with S3. | `/tmp/payloads` |
+| `S3_ENDPOINT_URL` | The URL of the S3-compatible storage. | - |
+| `S3_ACCESS_KEY` | The access key for S3. | - |
+| `S3_SECRET_KEY` | The secret key for S3. | - |
+| `S3_DEFAULT_BUCKET`| The default bucket name for uploading results. | `avtomatika-payloads` |
+## Development
+To install the necessary dependencies for running tests, use the following command:
+```bash
+pip install .[test]
+```

avtomatika_worker-1.0a2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+avtomatika_worker/__init__.py,sha256=j0up34aVy7xyI67xg04TVbXSSSKGdO49vsBKhtH_D0M,287
+avtomatika_worker/config.py,sha256=oEQMpmP4AkGKdgEE1BJxojdQkK7LrogmRKJ7ib-M9xs,4555
+avtomatika_worker/types.py,sha256=2YL6MRG2LImCUKcb0G-B3757n7zWrrUc8NXnoCLKJlo,154
+avtomatika_worker/worker.py,sha256=lyKvIPVcokQrd6qagit_BbMoZyyqivCdNyV4fwSJTY0,18421
+avtomatika_worker-1.0a2.dist-info/licenses/LICENSE,sha256=tqCjw9Y1vbU-hLcWi__7wQstLbt2T1XWPdbQYqCxuWY,1072
+avtomatika_worker-1.0a2.dist-info/METADATA,sha256=uJHXVdHzcJBdfQ-1rowBEYg488kbwifkvpDwvgocMqs,12288
+avtomatika_worker-1.0a2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+avtomatika_worker-1.0a2.dist-info/top_level.txt,sha256=d3b5BUeUrHM1Cn-cbStz-hpucikEBlPOvtcmQ_j3qAs,18
+avtomatika_worker-1.0a2.dist-info/RECORD,,

avtomatika_worker-1.0a2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

avtomatika_worker-1.0a2.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Dmitrii Gagarin
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

avtomatika_worker-1.0a2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ avtomatika_worker