PyPI - avtomatika - Versions diffs - 1.0b7__py3-none-any.whl → 1.0b8__py3-none-any.whl - Mend

avtomatika 1.0b7py3-none-any.whl → 1.0b8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

avtomatika/app_keys.py +1 -0
avtomatika/config.py +10 -0
avtomatika/data_types.py +2 -1
avtomatika/dispatcher.py +8 -26
avtomatika/engine.py +19 -1
avtomatika/executor.py +34 -6
avtomatika/health_checker.py +23 -5
avtomatika/history/base.py +60 -6
avtomatika/history/noop.py +18 -7
avtomatika/history/postgres.py +8 -6
avtomatika/history/sqlite.py +7 -5
avtomatika/metrics.py +1 -1
avtomatika/reputation.py +46 -40
avtomatika/s3.py +323 -0
avtomatika/storage/base.py +45 -4
avtomatika/storage/memory.py +44 -6
avtomatika/storage/redis.py +185 -252
avtomatika/utils/webhook_sender.py +44 -2
avtomatika/watcher.py +33 -35
avtomatika/ws_manager.py +7 -6
{avtomatika-1.0b7.dist-info → avtomatika-1.0b8.dist-info}/METADATA +50 -2
avtomatika-1.0b8.dist-info/RECORD +46 -0
avtomatika-1.0b7.dist-info/RECORD +0 -45
{avtomatika-1.0b7.dist-info → avtomatika-1.0b8.dist-info}/WHEEL +0 -0
{avtomatika-1.0b7.dist-info → avtomatika-1.0b8.dist-info}/licenses/LICENSE +0 -0
{avtomatika-1.0b7.dist-info → avtomatika-1.0b8.dist-info}/top_level.txt +0 -0

avtomatika/s3.py ADDED Viewed

@@ -0,0 +1,323 @@
+from asyncio import Semaphore, gather, to_thread
+from logging import getLogger
+from os import sep, walk
+from pathlib import Path
+from shutil import rmtree
+from typing import Any, Tuple
+from aiofiles import open as aiopen
+from obstore import delete_async, get_async, put_async
+from obstore import list as obstore_list
+from obstore.store import S3Store
+from orjson import dumps, loads
+from .config import Config
+from .history.base import HistoryStorageBase
+logger = getLogger(__name__)
+try:
+    HAS_S3_LIBS = True
+except ImportError:
+    HAS_S3_LIBS = False
+    S3Store = Any
+class TaskFiles:
+    """
+    Manages files for a specific job, ensuring full compatibility with avtomatika-worker.
+    Supports recursive directory download/upload and non-blocking I/O.
+    """
+    def __init__(
+        self,
+        store: "S3Store",
+        bucket: str,
+        job_id: str,
+        base_local_dir: str | Path,
+        semaphore: Semaphore,
+        history: HistoryStorageBase | None = None,
+    ):
+        self._store = store
+        self._bucket = bucket
+        self._job_id = job_id
+        self._history = history
+        self._s3_prefix = f"jobs/{job_id}/"
+        self.local_dir = Path(base_local_dir) / job_id
+        self._semaphore = semaphore
+    def _ensure_local_dir(self) -> None:
+        if not self.local_dir.exists():
+            self.local_dir.mkdir(parents=True, exist_ok=True)
+    def path(self, filename: str) -> Path:
+        """Returns local path for a filename, ensuring the directory exists."""
+        self._ensure_local_dir()
+        clean_name = filename.split("/")[-1] if "://" in filename else filename.lstrip("/")
+        return self.local_dir / clean_name
+    def _parse_s3_uri(self, uri: str) -> Tuple[str, str, bool]:
+        """
+        Parses s3://bucket/key into (bucket, key, is_directory).
+        is_directory is True if uri ends with '/'.
+        """
+        is_dir = uri.endswith("/")
+        if not uri.startswith("s3://"):
+            key = f"{self._s3_prefix}{uri.lstrip('/')}"
+            return self._bucket, key, is_dir
+        parts = uri[5:].split("/", 1)
+        bucket = parts[0]
+        key = parts[1] if len(parts) > 1 else ""
+        return bucket, key, is_dir
+    async def _download_single_file(self, key: str, local_path: Path) -> None:
+        """Downloads a single file safely using semaphore and streaming to avoid OOM."""
+        if not local_path.parent.exists():
+            await to_thread(local_path.parent.mkdir, parents=True, exist_ok=True)
+        async with self._semaphore:
+            response = await get_async(self._store, key)
+            stream = response.stream()
+            async with aiopen(local_path, "wb") as f:
+                async for chunk in stream:
+                    await f.write(chunk)
+    async def download(self, name_or_uri: str, local_name: str | None = None) -> Path:
+        """
+        Downloads a file or directory (recursively).
+        If URI ends with '/', it treats it as a directory.
+        """
+        bucket, key, is_dir = self._parse_s3_uri(name_or_uri)
+        if local_name:
+            target_path = self.path(local_name)
+        else:
+            suffix = key.replace(self._s3_prefix, "", 1) if key.startswith(self._s3_prefix) else key.split("/")[-1]
+            target_path = self.local_dir / suffix
+        if is_dir:
+            logger.info(f"Recursive download: s3://{bucket}/{key} -> {target_path}")
+            entries = await to_thread(lambda: list(obstore_list(self._store, prefix=key)))
+            tasks = []
+            for entry in entries:
+                s3_key = entry["path"]
+                rel_path = s3_key[len(key) :]
+                if not rel_path:
+                    continue
+                local_file_path = target_path / rel_path
+                tasks.append(self._download_single_file(s3_key, local_file_path))
+            if tasks:
+                await gather(*tasks)
+            await self._log_event("download_dir", f"s3://{bucket}/{key}", str(target_path))
+            return target_path
+        else:
+            logger.debug(f"Downloading s3://{bucket}/{key} -> {target_path}")
+            await self._download_single_file(key, target_path)
+            await self._log_event("download", f"s3://{bucket}/{key}", str(target_path))
+            return target_path
+    async def _upload_single_file(self, local_path: Path, s3_key: str) -> None:
+        """Uploads a single file safely using semaphore."""
+        async with self._semaphore:
+            async with aiopen(local_path, "rb") as f:
+                content = await f.read()
+            await put_async(self._store, s3_key, content)
+    async def upload(self, local_name: str, remote_name: str | None = None) -> str:
+        """
+        Uploads a file or directory recursively.
+        If local_name points to a directory, it uploads all contents.
+        """
+        local_path = self.path(local_name)
+        if local_path.is_dir():
+            base_remote = (remote_name or local_name).lstrip("/")
+            if not base_remote.endswith("/"):
+                base_remote += "/"
+            target_prefix = f"{self._s3_prefix}{base_remote}"
+            logger.info(f"Recursive upload: {local_path} -> s3://{self._bucket}/{target_prefix}")
+            def collect_files():
+                files_to_upload = []
+                for root, _, files in walk(local_path):
+                    for file in files:
+                        abs_path = Path(root) / file
+                        rel_path = abs_path.relative_to(local_path)
+                        s3_key = f"{target_prefix}{str(rel_path).replace(sep, '/')}"
+                        files_to_upload.append((abs_path, s3_key))
+                return files_to_upload
+            files_map = await to_thread(collect_files)
+            tasks = [self._upload_single_file(lp, k) for lp, k in files_map]
+            if tasks:
+                await gather(*tasks)
+            uri = f"s3://{self._bucket}/{target_prefix}"
+            await self._log_event("upload_dir", uri, str(local_path))
+            return uri
+        elif local_path.exists():
+            target_key = f"{self._s3_prefix}{(remote_name or local_name).lstrip('/')}"
+            logger.debug(f"Uploading {local_path} -> s3://{self._bucket}/{target_key}")
+            await self._upload_single_file(local_path, target_key)
+            uri = f"s3://{self._bucket}/{target_key}"
+            await self._log_event("upload", uri, str(local_path))
+            return uri
+        else:
+            raise FileNotFoundError(f"Local file/dir not found: {local_path}")
+    async def read_text(self, name_or_uri: str) -> str:
+        bucket, key, _ = self._parse_s3_uri(name_or_uri)
+        filename = key.split("/")[-1]
+        local_path = self.path(filename)
+        if not local_path.exists():
+            await self.download(name_or_uri)
+        async with aiopen(local_path, "r", encoding="utf-8") as f:
+            return await f.read()
+    async def read_json(self, name_or_uri: str) -> Any:
+        bucket, key, _ = self._parse_s3_uri(name_or_uri)
+        filename = key.split("/")[-1]
+        local_path = self.path(filename)
+        if not local_path.exists():
+            await self.download(name_or_uri)
+        async with aiopen(local_path, "rb") as f:
+            content = await f.read()
+            return loads(content)
+    async def write_json(self, filename: str, data: Any, upload: bool = True) -> str:
+        """Writes JSON locally (binary mode) and optionally uploads to S3."""
+        local_path = self.path(filename)
+        json_bytes = dumps(data)
+        async with aiopen(local_path, "wb") as f:
+            await f.write(json_bytes)
+        if upload:
+            return await self.upload(filename)
+        return f"file://{local_path}"
+    async def write_text(self, filename: str, text: str, upload: bool = True) -> Path:
+        local_path = self.path(filename)
+        async with aiopen(local_path, "w", encoding="utf-8") as f:
+            await f.write(text)
+        if upload:
+            await self.upload(filename)
+        return local_path
+    async def cleanup(self) -> None:
+        """Full cleanup of S3 prefix and local job directory."""
+        logger.info(f"Cleanup for job {self._job_id}...")
+        try:
+            entries = await to_thread(lambda: list(obstore_list(self._store, prefix=self._s3_prefix)))
+            paths_to_delete = [entry["path"] for entry in entries]
+            if paths_to_delete:
+                await delete_async(self._store, paths_to_delete)
+        except Exception as e:
+            logger.error(f"S3 cleanup error: {e}")
+        if self.local_dir.exists():
+            await to_thread(rmtree, self.local_dir)
+    async def _log_event(self, operation: str, file_uri: str, local_path: str) -> None:
+        if not self._history:
+            return
+        try:
+            await self._history.log_job_event(
+                {
+                    "job_id": self._job_id,
+                    "event_type": "s3_operation",
+                    "state": "running",
+                    "context_snapshot": {
+                        "operation": operation,
+                        "s3_uri": file_uri,
+                        "local_path": str(local_path),
+                    },
+                }
+            )
+        except Exception as e:
+            logger.warning(f"Failed to log S3 event: {e}")
+class S3Service:
+    """
+    Central service for S3 operations.
+    Initializes the Store and provides TaskFiles instances.
+    """
+    def __init__(self, config: Config, history: HistoryStorageBase | None = None):
+        self.config = config
+        self._history = history
+        self._store: S3Store | None = None
+        self._semaphore: Semaphore | None = None
+        self._config_present = bool(config.S3_ENDPOINT_URL and config.S3_ACCESS_KEY and config.S3_SECRET_KEY)
+        if self._config_present:
+            if HAS_S3_LIBS:
+                self._enabled = True
+                self._initialize_store()
+            else:
+                logger.error(
+                    "S3 configuration found, but 'avtomatika[s3]' extra dependencies are not installed. "
+                    "S3 support will be disabled. Install with: pip install 'avtomatika[s3]'"
+                )
+                self._enabled = False
+        else:
+            self._enabled = False
+            if any([config.S3_ENDPOINT_URL, config.S3_ACCESS_KEY, config.S3_SECRET_KEY]):
+                logger.warning("Partial S3 configuration found. S3 support disabled.")
+    def _initialize_store(self) -> None:
+        try:
+            self._store = S3Store(
+                bucket=self.config.S3_DEFAULT_BUCKET,
+                access_key_id=self.config.S3_ACCESS_KEY,
+                secret_access_key=self.config.S3_SECRET_KEY,
+                region=self.config.S3_REGION,
+                endpoint=self.config.S3_ENDPOINT_URL,
+                allow_http="http://" in self.config.S3_ENDPOINT_URL,
+                force_path_style=True,
+            )
+            self._semaphore = Semaphore(self.config.S3_MAX_CONCURRENCY)
+            logger.info(
+                f"S3Service initialized (Endpoint: {self.config.S3_ENDPOINT_URL}, "
+                f"Bucket: {self.config.S3_DEFAULT_BUCKET}, "
+                f"Max Concurrency: {self.config.S3_MAX_CONCURRENCY})"
+            )
+        except Exception as e:
+            logger.error(f"Failed to initialize S3 Store: {e}")
+            self._enabled = False
+    def get_task_files(self, job_id: str) -> TaskFiles | None:
+        if not self._enabled or not self._store or not self._semaphore:
+            return None
+        return TaskFiles(
+            self._store,
+            self.config.S3_DEFAULT_BUCKET,
+            job_id,
+            self.config.TASK_FILES_DIR,
+            self._semaphore,
+            self._history,
+        )
+    async def close(self) -> None:
+        pass

avtomatika/storage/base.py CHANGED Viewed

@@ -142,6 +142,37 @@ class StorageBackend(ABC):
         """
         raise NotImplementedError
+    @abstractmethod
+    async def get_active_worker_ids(self) -> list[str]:
+        """Returns a list of IDs for all currently active workers.
+        :return: A list of worker ID strings.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    async def cleanup_expired_workers(self) -> None:
+        """Maintenance task to clean up internal indexes from expired worker entries."""
+        raise NotImplementedError
+    @abstractmethod
+    async def get_workers(self, worker_ids: list[str]) -> list[dict[str, Any]]:
+        """Bulk retrieves worker info for a list of IDs.
+        :param worker_ids: List of worker identifiers.
+        :return: List of worker info dictionaries.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    async def find_workers_for_task(self, task_type: str) -> list[str]:
+        """Finds idle workers that support the given task.
+        :param task_type: The type of task to find workers for.
+        :return: A list of worker IDs that are idle and support the task.
+        """
+        raise NotImplementedError
     @abstractmethod
     async def add_job_to_watch(self, job_id: str, timeout_at: float) -> None:
         """Add a job to the list for timeout tracking.
@@ -152,9 +183,10 @@ class StorageBackend(ABC):
         raise NotImplementedError
     @abstractmethod
-    async def get_timed_out_jobs(self) -> list[str]:
+    async def get_timed_out_jobs(self, limit: int = 100) -> list[str]:
         """Get a list of job IDs that are overdue and remove them from the tracking list.
+        :param limit: Maximum number of jobs to retrieve.
         :return: A list of overdue job IDs.
         """
         raise NotImplementedError
@@ -165,9 +197,10 @@ class StorageBackend(ABC):
         raise NotImplementedError
     @abstractmethod
-    async def dequeue_job(self) -> tuple[str, str] | None:
+    async def dequeue_job(self, block: int | None = None) -> tuple[str, str] | None:
         """Retrieve a job ID and its message ID from the execution queue.
+        :param block: Milliseconds to block if no message is available. None for non-blocking.
         :return: A tuple of (job_id, message_id) or None if the timeout has expired.
         """
         raise NotImplementedError
@@ -250,7 +283,7 @@ class StorageBackend(ABC):
         raise NotImplementedError
     @abstractmethod
-    async def set_worker_token(self, worker_id: str, token: str):
+    async def set_worker_token(self, worker_id: str, token: str) -> None:
         """Saves an individual token for a specific worker."""
         raise NotImplementedError
@@ -265,7 +298,7 @@ class StorageBackend(ABC):
         raise NotImplementedError
     @abstractmethod
-    async def flush_all(self):
+    async def flush_all(self) -> None:
         """Completely clears the storage. Used mainly for tests."""
         raise NotImplementedError
@@ -312,3 +345,11 @@ class StorageBackend(ABC):
         :return: True if the lock was successfully released, False otherwise.
         """
         raise NotImplementedError
+    @abstractmethod
+    async def ping(self) -> bool:
+        """Checks connection to the storage backend.
+        :return: True if storage is accessible, False otherwise.
+        """
+        raise NotImplementedError

avtomatika/storage/memory.py CHANGED Viewed

@@ -154,6 +154,33 @@ class MemoryStorage(StorageBackend):
             )
             return active_workers
+    async def get_active_worker_ids(self) -> list[str]:
+        async with self._lock:
+            now = monotonic()
+            return [worker_id for worker_id, ttl in self._worker_ttls.items() if ttl > now]
+    async def cleanup_expired_workers(self) -> None:
+        async with self._lock:
+            await self._clean_expired()
+    async def get_workers(self, worker_ids: list[str]) -> list[dict[str, Any]]:
+        async with self._lock:
+            return [self._workers[wid] for wid in worker_ids if wid in self._workers]
+    async def find_workers_for_task(self, task_type: str) -> list[str]:
+        """Finds idle workers supporting the task (O(N) for memory storage)."""
+        async with self._lock:
+            now = monotonic()
+            candidates = []
+            for worker_id, info in self._workers.items():
+                if self._worker_ttls.get(worker_id, 0) <= now:
+                    continue
+                if info.get("status", "idle") != "idle":
+                    continue
+                if task_type in info.get("supported_tasks", []):
+                    candidates.append(worker_id)
+            return candidates
     async def add_job_to_watch(self, job_id: str, timeout_at: float) -> None:
         async with self._lock:
             self._watched_jobs[job_id] = timeout_at
@@ -173,13 +200,21 @@ class MemoryStorage(StorageBackend):
     async def enqueue_job(self, job_id: str) -> None:
         await self._job_queue.put(job_id)
-    async def dequeue_job(self) -> tuple[str, str] | None:
-        """Waits indefinitely for a job ID from the queue and returns it.
-        Returns a tuple of (job_id, message_id). In MemoryStorage, message_id is dummy.
+    async def dequeue_job(self, block: int | None = None) -> tuple[str, str] | None:
+        """Waits for a job ID from the queue.
+        If block is None, waits indefinitely.
+        If block is int, waits for that many milliseconds.
         """
-        job_id = await self._job_queue.get()
-        self._job_queue.task_done()
-        return job_id, "memory-msg-id"
+        try:
+            if block is None:
+                job_id = await self._job_queue.get()
+            else:
+                job_id = await wait_for(self._job_queue.get(), timeout=block / 1000.0)
+            self._job_queue.task_done()
+            return job_id, "memory-msg-id"
+        except AsyncTimeoutError:
+            return None
     async def ack_job(self, message_id: str) -> None:
         """No-op for MemoryStorage as it doesn't support persistent streams."""
@@ -334,3 +369,6 @@ class MemoryStorage(StorageBackend):
                     del self._locks[key]
                     return True
             return False
+    async def ping(self) -> bool:
+        return True

avtomatika 1.0b7__py3-none-any.whl → 1.0b8__py3-none-any.whl

avtomatika 1.0b7py3-none-any.whl → 1.0b8py3-none-any.whl