PyPI - shared-tensor - Versions diffs - 0.1.0__py3-none-any.whl - Mend

shared-tensor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

shared_tensor/__init__.py +27 -0
shared_tensor/async_client.py +302 -0
shared_tensor/async_provider.py +173 -0
shared_tensor/async_task.py +361 -0
shared_tensor/client.py +265 -0
shared_tensor/errors.py +16 -0
shared_tensor/jsonrpc.py +163 -0
shared_tensor/provider.py +155 -0
shared_tensor/server.py +458 -0
shared_tensor/utils.py +122 -0
shared_tensor-0.1.0.dist-info/METADATA +420 -0
shared_tensor-0.1.0.dist-info/RECORD +16 -0
shared_tensor-0.1.0.dist-info/WHEEL +5 -0
shared_tensor-0.1.0.dist-info/entry_points.txt +2 -0
shared_tensor-0.1.0.dist-info/licenses/LICENSE +181 -0
shared_tensor-0.1.0.dist-info/top_level.txt +1 -0

shared_tensor/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Shared Tensor Library
+A library for sharing GPU memory objects across processes using IPC mechanisms.
+Enables model and inference engine separation architecture using JSON-RPC 2.0 protocol.
+"""
+from shared_tensor.provider import SharedTensorProvider
+from shared_tensor.client import SharedTensorClient
+from shared_tensor.server import SharedTensorServer
+from shared_tensor.async_provider import AsyncSharedTensorProvider
+from shared_tensor.async_client import AsyncSharedTensorClient
+from shared_tensor.async_task import TaskStatus, TaskInfo
+__version__ = "0.1.0"
+__author__ = "Athena Team"
+# Export main functionality
+__all__ = [
+    "SharedTensorProvider",
+    "SharedTensorClient",
+    "SharedTensorServer",
+    "AsyncSharedTensorProvider",
+    "AsyncSharedTensorClient",
+    "TaskStatus",
+    "TaskInfo",
+]

shared_tensor/async_client.py ADDED Viewed

@@ -0,0 +1,302 @@
+"""
+Async Shared Tensor Client
+Supports long-running task execution without HTTP timeout limitations.
+"""
+import time
+import logging
+from typing import Any, Dict, Optional, Callable
+import torch
+from shared_tensor.errors import SharedTensorServerError
+from shared_tensor.client import SharedTensorClient
+from shared_tensor.async_task import TaskStatus, TaskInfo
+from shared_tensor.utils import serialize_result
+__all__ = ["AsyncSharedTensorClient", "execute_remote_function_async"]
+logger = logging.getLogger(__name__)
+class AsyncSharedTensorClient:
+    """
+    Async client for shared tensor operations
+    Supports submitting long-running tasks and polling for results
+    without being limited by HTTP timeouts.
+    """
+    def __init__(self, server_port: int = 2537, verbose_debug: bool = False, poll_interval: float = 1.0):
+        """
+        Initialize async client
+        Args:
+            server_port: Port of the shared tensor server
+            verbose_debug: Whether to enable verbose debug logging
+            poll_interval: Interval in seconds for polling task status
+        """
+        self.server_url = f"http://localhost:{server_port}"
+        self.verbose_debug = verbose_debug
+        self.poll_interval = poll_interval
+        self._client = SharedTensorClient(server_port, verbose_debug=verbose_debug)
+    def submit_task(self, function_path: str, args: tuple = (), kwargs: Dict[str, Any] = None, options: Dict[str, Any] = None) -> str:
+        """
+        Submit a task for async execution
+        Args:
+            function_path: Function path in format "module.submodule:function_name"
+            args: Positional arguments
+            kwargs: Keyword arguments
+            options: Options for the task
+        Returns:
+            Task ID for tracking the execution
+        """
+        if kwargs is None:
+            kwargs = {}
+        args_hex = serialize_result(args).hex() if args else ""
+        kwargs_hex = serialize_result(kwargs).hex() if kwargs else ""
+        if self.verbose_debug:
+            logger.debug(f"Submitting task with function path {function_path}, args {args}, kwargs {kwargs}, and options {options}")
+        else:
+            logger.debug(f"Submitting task with function path {function_path}")
+        response = self._client._send_request(
+            self._client._create_request("submit_task", {
+                "function_path": function_path,
+                "args": args_hex,
+                "kwargs": kwargs_hex,
+                "options": options,
+                "encoding": "pickle_hex",
+            })
+        )
+        if response.error:
+            raise SharedTensorServerError(f"Failed to submit task: {response.error}")
+        task_id = response.result.get("task_id")
+        if not task_id:
+            raise SharedTensorServerError("Server did not return task ID")
+        logger.debug(f"Task submitted: {task_id}")
+        return task_id
+    def get_task_status(self, task_id: str) -> TaskInfo:
+        """
+        Get current status of a task
+        Args:
+            task_id: Task ID returned by submit_task
+        Returns:
+            TaskInfo object with current status
+        """
+        logger.debug(f"Getting task status for task {task_id}")
+        response = self._client._send_request(
+            self._client._create_request("get_task_status", {"task_id": task_id})
+        )
+        if response.error:
+            logger.debug(f"Failed to get task status: {response.error}")
+            raise SharedTensorServerError(f"Failed to get task status: {response.error}")
+        task_data = response.result
+        if not task_data:
+            raise SharedTensorServerError(f"Task {task_id} not found")
+        return TaskInfo.from_dict(task_data)
+    def get_task_result(self, task_id: str) -> Any:
+        """
+        Get result of a completed task
+        Args:
+            task_id: Task ID
+        Returns:
+            Task result (deserialized)
+        Raises:
+            RuntimeError: If task failed or not completed
+        """
+        task_info = self.get_task_status(task_id)
+        if task_info.status == TaskStatus.FAILED:
+            raise SharedTensorServerError(f"Task failed: {task_info.error_message}")
+        if task_info.status != TaskStatus.COMPLETED:
+            raise SharedTensorServerError(f"Task not completed, current status: {task_info.status.value}")
+        if task_info.result_hex:
+            result_bytes = bytes.fromhex(task_info.result_hex)
+            return torch.multiprocessing.reducer.ForkingPickler.loads(result_bytes)
+        return None
+    def wait_for_task(self, task_id: str, timeout: Optional[float] = None,
+                     callback: Optional[Callable[[TaskInfo], None]] = None) -> Any:
+        """
+        Wait for a task to complete and return its result
+        Args:
+            task_id: Task ID
+            timeout: Maximum time to wait (None for no timeout)
+            callback: Optional callback function called on each status update
+        Returns:
+            Task result
+        """
+        start_time = time.time()
+        while True:
+            task_info = self.get_task_status(task_id)
+            # Call callback if provided
+            if callback:
+                try:
+                    callback(task_info)
+                except Exception as e:
+                    logger.warning(f"Callback error: {e}")
+            # Check if completed
+            if task_info.status == TaskStatus.COMPLETED:
+                return self.get_task_result(task_id)
+            # Check if failed
+            if task_info.status == TaskStatus.FAILED:
+                raise SharedTensorServerError(f"Task {task_id} failed: {task_info.error_message}")
+            # Check if cancelled
+            if task_info.status == TaskStatus.CANCELLED:
+                raise SharedTensorServerError("Task was cancelled")
+            # Check timeout
+            if timeout and (time.time() - start_time) > timeout:
+                raise SharedTensorServerError(f"Task {task_id} did not complete within {timeout} seconds")
+            # Sleep before next poll
+            time.sleep(self.poll_interval)
+    def execute_function_async(self, function_path: str, args: tuple = (),
+                              kwargs: Dict[str, Any] = None, options: Dict[str, Any] = None, wait: bool = True,
+                              timeout: Optional[float] = None,
+                              callback: Optional[Callable[[TaskInfo], None]] = None) -> Any:
+        """
+        Execute a function asynchronously
+        Args:
+            function_path: Function path
+            args: Positional arguments
+            kwargs: Keyword arguments
+            options: Options for the task
+            wait: Whether to wait for completion
+            timeout: Maximum time to wait if wait=True
+            callback: Status update callback
+        Returns:
+            If wait=True: Function result
+            If wait=False: Task ID
+        """
+        task_id = self.submit_task(function_path, args, kwargs, options)
+        if wait:
+            return self.wait_for_task(task_id, timeout, callback)
+        else:
+            return task_id
+    def cancel_task(self, task_id: str) -> bool:
+        """
+        Cancel a task
+        Args:
+            task_id: Task ID
+        Returns:
+            True if successfully cancelled
+        """
+        response = self._client._send_request(
+            self._client._create_request("cancel_task", {"task_id": task_id})
+        )
+        if response.error:
+            logger.error(f"Failed to cancel task: {response.error}")
+            return False
+        return response.result.get("cancelled", False)
+    def list_tasks(self, status: Optional[str] = None) -> Dict[str, TaskInfo]:
+        """
+        List tasks on the server
+        Args:
+            status: Optional status filter
+        Returns:
+            Dictionary of task ID -> TaskInfo
+        """
+        params = {}
+        if status:
+            params["status"] = status
+        response = self._client._send_request(
+            self._client._create_request("list_tasks", params)
+        )
+        if response.error:
+            raise SharedTensorServerError(f"Failed to list tasks: {response.error}")
+        tasks = {}
+        for task_id, task_data in response.result.items():
+            tasks[task_id] = TaskInfo.from_dict(task_data)
+        return tasks
+    def close(self):
+        """Close the client"""
+        self._client.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+def execute_remote_function_async(
+    function_path: str,
+    args: tuple = (),
+    kwargs: Dict[str, Any] = None,
+    options: Dict[str, Any] = None,
+    server_port: int = 2537,
+    verbose_debug: bool = False,
+    poll_interval: float = 1.0,
+    wait: bool = True,
+    timeout: Optional[float] = None,
+    callback: Optional[Callable[[TaskInfo], None]] = None
+) -> Any:
+    """
+    Convenience function to execute a remote function asynchronously
+    Args:
+        function_path: Function path
+        args: Positional arguments
+        kwargs: Keyword arguments
+        options: Options for the task
+        server_port: Port of the shared tensor server
+        verbose_debug: Whether to enable verbose debug logging
+        poll_interval: Interval in seconds for polling task status
+        wait: Whether to wait for completion
+        timeout: Maximum time to wait
+        callback: Status update callback
+    Returns:
+        Function result if wait=True, task ID if wait=False
+    """
+    with AsyncSharedTensorClient(server_port, verbose_debug, poll_interval) as client:
+        return client.execute_function_async(function_path, args, kwargs, options, wait, timeout, callback)

shared_tensor/async_provider.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""
+Async Provider for Shared Tensor
+Extends the provider pattern to support async task execution
+"""
+import os
+import logging
+from functools import wraps
+from typing import Any, Dict, Callable, Optional
+from shared_tensor.errors import SharedTensorProviderError
+from shared_tensor.provider import SharedTensorProvider
+from shared_tensor.async_client import AsyncSharedTensorClient
+from shared_tensor.async_task import TaskInfo
+__all__ = ["AsyncSharedTensorProvider"]
+logger = logging.getLogger(__name__)
+global_rank = int(os.getenv("RANK", 0))
+class AsyncSharedTensorProvider(SharedTensorProvider):
+    """
+    Async provider for shared tensor operations
+    Supports both sync and async execution modes
+    """
+    def __init__(self, server_port: int = 2537 + global_rank, verbose_debug: bool = False, poll_interval: float = 1.0):
+        super().__init__(server_port=server_port, verbose_debug=verbose_debug)
+        self.poll_interval = poll_interval
+        logger.debug(f"AsyncSharedTensorProvider initialized with server port {server_port}, verbose debug {verbose_debug}, and poll interval {poll_interval}")
+        self._async_client = None
+    def _get_async_client(self) -> AsyncSharedTensorClient:
+        """Get or create async client"""
+        if self._async_client is None:
+            logger.debug(f"Creating new async client with server port {self.server_port} and poll interval {self.poll_interval}")
+            self._async_client = AsyncSharedTensorClient(self.server_port, self.verbose_debug, self.poll_interval)
+            logger.debug(f"Async client created with server port {self.server_port} and poll interval {self.poll_interval}")
+        return self._async_client
+    def share_async(self, name: Optional[str] = None, wait: bool = True, singleton: bool = True, singleton_key_formatter: Optional[str] = None):
+        """
+        Decorator to register a function for async remote sharing
+        Args:
+            name: Optional custom name for the function
+            wait: Whether to wait for completion by default
+            singleton: Whether to use a singleton instance of the function result
+            singleton_key_formatter: Formatter for cached results
+        """
+        def decorator(func: Callable):
+            func_name = name or func.__name__
+            if self.server_mode == "true":
+                logger.debug(f"Server mode is true, returning function {func_name} without registering")
+                return func
+            logger.debug(f"Server mode is false, registering function {func_name}")
+            function_path = self._get_function_path(func)
+            logger.debug(f"Function {func_name} registered with function path {function_path}")
+            options = {
+                'name': func_name,
+                'singleton': singleton,
+                'singleton_key_formatter': singleton_key_formatter,
+            }
+            function_info = {
+                'name': func_name,
+                'function_path': function_path,
+                'options': options,
+                'async_default_wait': wait
+            }
+            self._registered_functions[func_name] = function_info
+            @wraps(func)
+            def wrapper(*args, **kwargs):
+                return self._execute_async_function(func_name, args, kwargs, options)
+            wrapper.submit_async = lambda *args, **kwargs: self._submit_async_function(func_name, args, kwargs, options)
+            wrapper.execute_async = lambda *args, wait=wait, timeout=None, callback=None, **kwargs: \
+                self._execute_async_function_with_options(func_name, args, kwargs, options, wait, timeout, callback)
+            return wrapper
+        return decorator
+    def _submit_async_function(self, func_name: str, args: tuple, kwargs: dict, options: dict) -> str:
+        """Submit function for async execution, return task ID"""
+        try:
+            if func_name not in self._registered_functions:
+                raise SharedTensorProviderError(f"Function {func_name} not registered")
+            function_info = self._registered_functions[func_name]
+            function_path = function_info['function_path']
+            async_client = self._get_async_client()
+            logger.debug(f"Submitting async function {func_name} with function path {function_path} and options {options}")
+            return async_client.submit_task(function_path, args, kwargs, options)
+        except Exception as e:
+            raise SharedTensorProviderError(f"Failed to submit async function {func_name}: {str(e)}")
+    def _execute_async_function(self, func_name: str, args: tuple, kwargs: dict, options: dict) -> Any:
+        """Execute function using default async settings"""
+        function_info = self._registered_functions[func_name]
+        wait = function_info.get('async_default_wait', True)
+        if wait:
+            return self._execute_async_function_with_options(func_name, args, kwargs, options, True, None, None)
+        else:
+            return self._submit_async_function(func_name, args, kwargs, options)
+    def _execute_async_function_with_options(self, func_name: str, args: tuple, kwargs: dict, options: dict,
+                                           wait: bool, timeout: Optional[float],
+                                           callback: Optional[Callable[[TaskInfo], None]]) -> Any:
+        """Execute function with specific async options"""
+        try:
+            if func_name not in self._registered_functions:
+                raise SharedTensorProviderError(f"Function {func_name} not registered")
+            function_info = self._registered_functions[func_name]
+            function_path = function_info['function_path']
+            async_client = self._get_async_client()
+            logger.debug(f"Executing async function {func_name} with function path {function_path} and options {options}")
+            return async_client.execute_function_async(function_path, args, kwargs, options, wait, timeout, callback)
+        except Exception as e:
+            raise SharedTensorProviderError(f"Failed to execute async function {func_name}: {str(e)}")
+    def get_task_status(self, task_id: str) -> TaskInfo:
+        """Get status of a task"""
+        async_client = self._get_async_client()
+        logger.debug(f"Getting status of task {task_id}")
+        return async_client.get_task_status(task_id)
+    def get_task_result(self, task_id: str) -> Any:
+        """Get result of a completed task"""
+        async_client = self._get_async_client()
+        logger.debug(f"Getting result of task {task_id}")
+        return async_client.get_task_result(task_id)
+    def wait_for_task(self, task_id: str, timeout: Optional[float] = None,
+                     callback: Optional[Callable[[TaskInfo], None]] = None) -> Any:
+        """Wait for a task to complete"""
+        async_client = self._get_async_client()
+        logger.debug(f"Waiting for task {task_id} with timeout {timeout} and callback {callback}")
+        return async_client.wait_for_task(task_id, timeout, callback)
+    def cancel_task(self, task_id: str) -> bool:
+        """Cancel a task"""
+        async_client = self._get_async_client()
+        logger.debug(f"Cancelling task {task_id}")
+        return async_client.cancel_task(task_id)
+    def list_tasks(self, status: Optional[str] = None) -> Dict[str, TaskInfo]:
+        """List tasks on the server"""
+        async_client = self._get_async_client()
+        logger.debug(f"Listing tasks with status {status}")
+        return async_client.list_tasks(status)
+    def close(self):
+        """Close the provider and its clients"""
+        super().close()
+        if self._async_client:
+            logger.debug(f"Closing async client")
+            self._async_client.close()
+            logger.debug(f"Async client closed")
+            self._async_client = None