PyPI - indexify - Versions diffs - 0.2.40__py3-none-any.whl → 0.2.41__py3-none-any.whl - Mend

indexify 0.2.40py3-none-any.whl → 0.2.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

indexify/cli.py +92 -52
indexify/executor/agent.py +99 -187
indexify/executor/api_objects.py +2 -8
indexify/executor/downloader.py +129 -90
indexify/executor/executor_tasks.py +15 -30
indexify/executor/function_executor/function_executor.py +32 -0
indexify/executor/function_executor/function_executor_factory.py +26 -0
indexify/executor/function_executor/function_executor_map.py +91 -0
indexify/executor/function_executor/process_function_executor.py +64 -0
indexify/executor/function_executor/process_function_executor_factory.py +102 -0
indexify/executor/function_worker.py +227 -184
indexify/executor/runtime_probes.py +9 -8
indexify/executor/task_fetcher.py +80 -0
indexify/executor/task_reporter.py +18 -25
indexify/executor/task_store.py +35 -16
indexify/function_executor/function_executor_service.py +86 -0
indexify/function_executor/handlers/run_function/function_inputs_loader.py +54 -0
indexify/function_executor/handlers/run_function/handler.py +149 -0
indexify/function_executor/handlers/run_function/request_validator.py +24 -0
indexify/function_executor/handlers/run_function/response_helper.py +98 -0
indexify/function_executor/initialize_request_validator.py +22 -0
indexify/function_executor/proto/configuration.py +13 -0
indexify/function_executor/proto/function_executor.proto +70 -0
indexify/function_executor/proto/function_executor_pb2.py +53 -0
indexify/function_executor/proto/function_executor_pb2.pyi +125 -0
indexify/function_executor/proto/function_executor_pb2_grpc.py +163 -0
indexify/function_executor/proto/message_validator.py +38 -0
indexify/function_executor/server.py +31 -0
indexify/functions_sdk/data_objects.py +0 -9
indexify/functions_sdk/graph.py +10 -11
indexify/functions_sdk/graph_definition.py +2 -2
indexify/functions_sdk/image.py +35 -30
indexify/functions_sdk/indexify_functions.py +5 -5
indexify/http_client.py +15 -23
indexify/logging.py +32 -0
{indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/METADATA +3 -1
indexify-0.2.41.dist-info/RECORD +53 -0
indexify/executor/indexify_executor.py +0 -32
indexify-0.2.40.dist-info/RECORD +0 -34
{indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/LICENSE.txt +0 -0
{indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/WHEEL +0 -0
{indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/entry_points.txt +0 -0

indexify/executor/api_objects.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from typing import Any, Dict, List, Optional
-from pydantic import BaseModel, Json
-from indexify.functions_sdk.data_objects import IndexifyData
+from pydantic import BaseModel
 class Task(BaseModel):
@@ -21,7 +19,7 @@ class ExecutorMetadata(BaseModel):
     executor_version: str
     addr: str
     image_name: str
-    image_version: int
+    image_hash: str
     labels: Dict[str, Any]
@@ -29,10 +27,6 @@ class RouterOutput(BaseModel):
     edges: List[str]
-class FnOutput(BaseModel):
-    payload: Json
 class TaskResult(BaseModel):
     router_output: Optional[RouterOutput] = None
     outcome: str

indexify/executor/downloader.py CHANGED Viewed

@@ -1,22 +1,22 @@
+import asyncio
 import os
-from typing import Optional
+from typing import Any, Optional
 import httpx
 import structlog
-from pydantic import BaseModel
-from indexify.functions_sdk.data_objects import IndexifyData
+from indexify.function_executor.proto.function_executor_pb2 import (
+    SerializedObject,
+)
 from ..common_util import get_httpx_client
-from ..functions_sdk.object_serializer import JsonSerializer, get_serializer
 from .api_objects import Task
-logger = structlog.get_logger(module=__name__)
-class DownloadedInputs(BaseModel):
-    input: IndexifyData
-    init_value: Optional[IndexifyData] = None
+class DownloadedInputs:
+    def __init__(self, input: SerializedObject, init_value: Optional[SerializedObject]):
+        self.input = input
+        self.init_value = init_value
 class Downloader:
@@ -24,103 +24,142 @@ class Downloader:
         self, code_path: str, base_url: str, config_path: Optional[str] = None
     ):
         self.code_path = code_path
-        self.base_url = base_url
-        self._client = get_httpx_client(config_path)
+        self._base_url = base_url
+        self._client = get_httpx_client(config_path, make_async=True)
+    async def download_graph(self, task: Task) -> SerializedObject:
+        # Cache graph to reduce load on the server.
+        graph_path = os.path.join(
+            self.code_path,
+            "graph_cache",
+            task.namespace,
+            f"{task.compute_graph}.{task.graph_version}",
+        )
+        # Filesystem operations are synchronous.
+        # Run in a separate thread to not block the main event loop.
+        graph: Optional[SerializedObject] = await asyncio.to_thread(
+            self._read_cached_graph, graph_path
+        )
+        if graph is not None:
+            return graph
+        logger = self._task_logger(task)
+        graph: SerializedObject = await self._fetch_graph(task, logger)
+        # Filesystem operations are synchronous.
+        # Run in a separate thread to not block the main event loop.
+        # We don't need to wait for the write completion so we use create_task.
+        asyncio.create_task(
+            asyncio.to_thread(self._write_cached_graph, task, graph_path, graph)
+        )
-    async def download_graph(self, namespace: str, name: str, version: int) -> str:
-        path = os.path.join(self.code_path, namespace, f"{name}.{version}")
+        return graph
+    def _read_cached_graph(self, path: str) -> Optional[SerializedObject]:
+        if not os.path.exists(path):
+            return None
+        with open(path, "rb") as f:
+            return SerializedObject.FromString(f.read())
+    def _write_cached_graph(
+        self, task: Task, path: str, graph: SerializedObject
+    ) -> None:
         if os.path.exists(path):
-            return path
+            # Another task already cached the graph.
+            return None
+        tmp_path = os.path.join(self.code_path, "task_graph_cache", task.id)
+        os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
+        with open(tmp_path, "wb") as f:
+            f.write(graph.SerializeToString())
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        # Atomically rename the fully written file at tmp path.
+        # This allows us to not use any locking because file link/unlink
+        # are atomic operations at filesystem level.
+        os.replace(tmp_path, path)
+    async def download_inputs(self, task: Task) -> DownloadedInputs:
+        logger = self._task_logger(task)
+        input: SerializedObject
+        first_function_in_graph = task.invocation_id == task.input_key.split("|")[-1]
+        if first_function_in_graph:
+            # The first function in Graph gets its input from graph invocation payload.
+            input = await self._fetch_graph_invocation_payload(task, logger)
+        else:
+            input = await self._fetch_function_input(task, logger)
+        init_value: Optional[SerializedObject] = None
+        if task.reducer_output_id is not None:
+            init_value = await self._fetch_function_init_value(task, logger)
+        return DownloadedInputs(input=input, init_value=init_value)
-        logger.info(
-            "downloading graph", namespace=namespace, name=name, version=version
+    def _task_logger(self, task: Task) -> Any:
+        return structlog.get_logger(
+            module=__name__,
+            namespace=task.namespace,
+            name=task.compute_graph,
+            version=task.graph_version,
+            task_id=task.id,
         )
-        response = self._client.get(
-            f"{self.base_url}/internal/namespaces/{namespace}/compute_graphs/{name}/code"
+    async def _fetch_graph(self, task: Task, logger: Any) -> SerializedObject:
+        """Downloads the compute graph for the task and returns it."""
+        return await self._fetch_url(
+            url=f"{self._base_url}/internal/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/versions/{task.graph_version}/code",
+            resource_description=f"compute graph: {task.compute_graph}",
+            logger=logger,
         )
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as e:
-            logger.error(
-                "failed to download graph",
-                namespace=namespace,
-                name=name,
-                version=version,
-                error=response.text,
-            )
-            raise
-        os.makedirs(os.path.dirname(path), exist_ok=True)
-        with open(path, "wb") as f:
-            f.write(response.content)
-        return path
-    async def download_input(self, task: Task) -> DownloadedInputs:
-        input_id = task.input_key.split("|")[-1]
-        if task.invocation_id == input_id:
-            url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/payload"
-        else:
-            url = f"{self.base_url}/internal/fn_outputs/{task.input_key}"
+    async def _fetch_graph_invocation_payload(
+        self, task: Task, logger: Any
+    ) -> SerializedObject:
+        return await self._fetch_url(
+            url=f"{self._base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/payload",
+            resource_description=f"graph invocation payload: {task.invocation_id}",
+            logger=logger,
+        )
-        reducer_url = None
-        if task.reducer_output_id:
-            reducer_url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/fn/{task.compute_fn}/output/{task.reducer_output_id}"
+    async def _fetch_function_input(self, task: Task, logger: Any) -> SerializedObject:
+        return await self._fetch_url(
+            url=f"{self._base_url}/internal/fn_outputs/{task.input_key}",
+            resource_description=f"function input: {task.input_key}",
+            logger=logger,
+        )
-        logger.info("downloading input", url=url, reducer_url=reducer_url)
-        response = self._client.get(url)
+    async def _fetch_function_init_value(
+        self, task: Task, logger: Any
+    ) -> SerializedObject:
+        return await self._fetch_url(
+            url=f"{self._base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}"
+            f"/invocations/{task.invocation_id}/fn/{task.compute_fn}/output/{task.reducer_output_id}",
+            resource_description=f"reducer output: {task.reducer_output_id}",
+            logger=logger,
+        )
+    async def _fetch_url(
+        self, url: str, resource_description: str, logger: Any
+    ) -> SerializedObject:
+        logger.info(f"fetching {resource_description}", url=url)
+        response = await self._client.get(url)
         try:
             response.raise_for_status()
         except httpx.HTTPStatusError as e:
             logger.error(
-                "failed to download input",
-                url=url,
-                reducer_url=reducer_url,
+                f"failed to download {resource_description}",
                 error=response.text,
+                exc_info=e,
             )
             raise
-        encoder = (
-            "json"
-            if response.headers["content-type"] == JsonSerializer.content_type
-            else "cloudpickle"
-        )
-        if task.invocation_id == input_id:
-            return DownloadedInputs(
-                input=IndexifyData(
-                    payload=response.content, id=input_id, encoder=encoder
-                ),
-            )
-        input_payload = response.content
-        if reducer_url:
-            response = self._client.get(reducer_url)
-            try:
-                response.raise_for_status()
-                init_value = response.content
-            except httpx.HTTPStatusError as e:
-                logger.error(
-                    "failed to download reducer output",
-                    url=reducer_url,
-                    error=response.text,
-                )
-                raise
-            return DownloadedInputs(
-                input=IndexifyData(
-                    input_id=task.invocation_id,
-                    payload=input_payload,
-                    encoder=encoder,
-                ),
-                init_value=IndexifyData(
-                    input_id=task.invocation_id, payload=init_value, encoder=encoder
-                ),
+        # We're hardcoding the content type currently used by Python SDK. It might change in the future.
+        # There's no other way for now to determine if the response is a bytes or string.
+        if response.headers["content-type"] == "application/octet-stream":
+            return SerializedObject(
+                bytes=response.content, content_type=response.headers["content-type"]
             )
-        return DownloadedInputs(
-            input=IndexifyData(
-                input_id=task.invocation_id,
-                payload=input_payload,
-                encoder=encoder,
+        else:
+            return SerializedObject(
+                string=response.text, content_type=response.headers["content-type"]
             )
-        )

indexify/executor/executor_tasks.py CHANGED Viewed

@@ -1,73 +1,58 @@
 import asyncio
-from typing import Optional
-from indexify.functions_sdk.data_objects import IndexifyData
+from pydantic import BaseModel
 from .api_objects import Task
 from .downloader import Downloader
-from .function_worker import FunctionWorker
+from .function_worker import FunctionWorker, FunctionWorkerInput
 class DownloadGraphTask(asyncio.Task):
     def __init__(
         self,
         *,
-        task: Task,
+        function_worker_input: FunctionWorkerInput,
         downloader: Downloader,
         **kwargs,
     ):
         kwargs["name"] = "download_graph"
         kwargs["loop"] = asyncio.get_event_loop()
         super().__init__(
-            downloader.download_graph(
-                task.namespace, task.compute_graph, task.graph_version
-            ),
+            downloader.download_graph(function_worker_input.task),
             **kwargs,
         )
-        self.task = task
+        self.function_worker_input = function_worker_input
-class DownloadInputTask(asyncio.Task):
+class DownloadInputsTask(asyncio.Task):
     def __init__(
         self,
         *,
-        task: Task,
+        function_worker_input: FunctionWorkerInput,
         downloader: Downloader,
         **kwargs,
     ):
-        kwargs["name"] = "download_input"
+        kwargs["name"] = "download_inputs"
         kwargs["loop"] = asyncio.get_event_loop()
         super().__init__(
-            downloader.download_input(task),
+            downloader.download_inputs(function_worker_input.task),
             **kwargs,
         )
-        self.task = task
+        self.function_worker_input = function_worker_input
-class ExtractTask(asyncio.Task):
+class RunTask(asyncio.Task):
     def __init__(
         self,
         *,
         function_worker: FunctionWorker,
-        task: Task,
-        input: IndexifyData,
-        init_value: Optional[IndexifyData] = None,
-        code_path: str,
+        function_worker_input: FunctionWorkerInput,
         **kwargs,
     ):
-        kwargs["name"] = "run_function"
+        kwargs["name"] = "run_task"
         kwargs["loop"] = asyncio.get_event_loop()
         super().__init__(
-            function_worker.async_submit(
-                namespace=task.namespace,
-                graph_name=task.compute_graph,
-                fn_name=task.compute_fn,
-                input=input,
-                init_value=init_value,
-                code_path=code_path,
-                version=task.graph_version,
-                invocation_id=task.invocation_id,
-            ),
+            function_worker.run(function_worker_input),
             **kwargs,
         )
-        self.task = task
+        self.function_worker_input = function_worker_input

indexify/executor/function_executor/function_executor.py ADDED Viewed

@@ -0,0 +1,32 @@
+from typing import Any, Optional
+import grpc
+# Timeout for Function Executor startup in seconds.
+# The timeout is counted from the moment when the Function Executor environment
+# is fully prepared and the Function Executor gets started.
+FUNCTION_EXECUTOR_READY_TIMEOUT_SEC = 5
+class FunctionExecutor:
+    """Abstract interface for a FunctionExecutor.
+    FunctionExecutor is a class that executes tasks for a particular function.
+    FunctionExecutor implements the gRPC server that listens for incoming tasks.
+    """
+    async def channel(self) -> grpc.aio.Channel:
+        """Returns a async gRPC channel to the Function Executor.
+        The channel is in ready state and can be used for all gRPC communication with the Function Executor
+        and can be shared among coroutines running in the same event loop in the same thread. Users should
+        not close the channel as it's reused for all requests.
+        Raises Exception if an error occurred."""
+        raise NotImplementedError
+    def state(self) -> Optional[Any]:
+        """Returns optional state object.
+        The state object can be used to associate any data with the Function Executor.
+        """
+        raise NotImplementedError

indexify/executor/function_executor/function_executor_factory.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Any, Optional
+from .function_executor import FunctionExecutor
+class FunctionExecutorFactory:
+    """Abstract class for creating function executors."""
+    async def create(
+        self, logger: Any, state: Optional[Any] = None
+    ) -> FunctionExecutor:
+        """Creates a new FunctionExecutor.
+        Args:
+            logger: logger to be used during the function.
+            state: state to be stored in the FunctionExecutor."""
+        raise NotImplementedError()
+    async def destroy(self, executor: FunctionExecutor, logger: Any) -> None:
+        """Destroys the FunctionExecutor and release all its resources.
+        Args:
+            logger: logger to be used during the function.
+        FunctionExecutor and customer code running inside of it are not notified about the destruction.
+        Never raises any Exceptions."""
+        raise NotImplementedError

indexify/executor/function_executor/function_executor_map.py ADDED Viewed

@@ -0,0 +1,91 @@
+import asyncio
+from typing import Any, Dict, Optional
+import grpc
+from indexify.function_executor.proto.function_executor_pb2 import (
+    InitializeRequest,
+    InitializeResponse,
+)
+from indexify.function_executor.proto.function_executor_pb2_grpc import (
+    FunctionExecutorStub,
+)
+from .function_executor import FunctionExecutor
+from .function_executor_factory import FunctionExecutorFactory
+class FunctionExecutorMap:
+    """A map of ID => FunctionExecutor.
+    The map is safe to use by multiple couroutines running in event loop on the same thread
+    but it's not thread safe (can't be used from different threads concurrently)."""
+    def __init__(self, factory: FunctionExecutorFactory):
+        self._factory = factory
+        # Map of initialized Function executors ready to run tasks.
+        # function ID -> FunctionExecutor
+        self._executors: Dict[str, FunctionExecutor] = {}
+        # We have to do all operations under this lock because we need to ensure
+        # that we don't create more Function Executors than required. This is important
+        # e.g. when a Function Executor is using the only available GPU on the machine.
+        # We can get rid of this locking in the future once we assing GPUs explicitly to Function Executors.
+        # Running the full test suite with all this locking removed doesn't make it run faster,
+        # so it looks like this full locking doesn't really result in any performance penalty so far.
+        self._executors_lock = asyncio.Lock()
+    async def get_or_create(
+        self,
+        id: str,
+        initialize_request: InitializeRequest,
+        initial_state: Any,
+        logger: Any,
+    ) -> FunctionExecutor:
+        """Returns a FunctionExecutor for the given ID.
+        If the FunctionExecutor for the given ID doesn't exist then it will be created and initialized.
+        Raises an exception if the FunctionExecutor creation or initialization failed.
+        """
+        async with self._executors_lock:
+            # Use existing Function Executor if it's already initialized.
+            if id in self._executors:
+                return self._executors[id]
+            executor: Optional[FunctionExecutor] = None
+            try:
+                executor = await self._factory.create(logger, state=initial_state)
+                channel: grpc.aio.Channel = await executor.channel()
+                stub: FunctionExecutorStub = FunctionExecutorStub(channel)
+                initialize_response: InitializeResponse = await stub.initialize(
+                    initialize_request
+                )
+                if not initialize_response.success:
+                    raise Exception("initialize RPC failed at function executor")
+            except Exception:
+                if executor is not None:
+                    await self._factory.destroy(executor=executor, logger=logger)
+                # Function Executor creation or initialization failed.
+                raise
+            self._executors[id] = executor
+            return executor
+    async def delete(
+        self, id: str, function_executor: FunctionExecutor, logger: Any
+    ) -> None:
+        """Deletes the FunctionExecutor for the given ID.
+        Does nothing if the FunctionExecutor for the given ID doesn't exist or was already deleted.
+        """
+        async with self._executors_lock:
+            if self._executors[id] != function_executor:
+                # Function Executor was already deleted or replaced and the caller is not aware of this.
+                return
+            del self._executors[id]
+            await self._factory.destroy(executor=function_executor, logger=logger)
+    async def clear(self, logger):
+        async with self._executors_lock:
+            while self._executors:
+                id, function_executor = self._executors.popitem()
+                await self._factory.destroy(function_executor, logger)

indexify/executor/function_executor/process_function_executor.py ADDED Viewed

@@ -0,0 +1,64 @@
+import asyncio
+from typing import Any, Optional
+import grpc
+from indexify.function_executor.proto.configuration import GRPC_CHANNEL_OPTIONS
+from .function_executor import (
+    FUNCTION_EXECUTOR_READY_TIMEOUT_SEC,
+    FunctionExecutor,
+)
+class ProcessFunctionExecutor(FunctionExecutor):
+    """A FunctionExecutor that runs in a separate host process."""
+    def __init__(
+        self,
+        process: asyncio.subprocess.Process,
+        port: int,
+        address: str,
+        logger: Any,
+        state: Optional[Any] = None,
+    ):
+        self._proc = process
+        self._port = port
+        self._address = address
+        self._logger = logger.bind(module=__name__)
+        self._channel: Optional[grpc.aio.Channel] = None
+        self._state: Optional[Any] = state
+    async def channel(self) -> grpc.aio.Channel:
+        # Not thread safe but async safe because we don't await.
+        if self._channel is not None:
+            return self._channel
+        channel: Optional[grpc.aio.Channel] = None
+        try:
+            channel = grpc.aio.insecure_channel(
+                self._address, options=GRPC_CHANNEL_OPTIONS
+            )
+            await asyncio.wait_for(
+                channel.channel_ready(),
+                timeout=FUNCTION_EXECUTOR_READY_TIMEOUT_SEC,
+            )
+            # Check if another channel was created by a concurrent coroutine.
+            # Not thread safe but async safe because we never overwrite non-None self._channel.
+            if self._channel is not None:
+                # Don't close and overwrite existing channel because it might be used for RPCs already.
+                await channel.close()
+                return self._channel
+            else:
+                self._channel = channel
+                return channel
+        except Exception:
+            if channel is not None:
+                await channel.close()
+            self._logger.error(
+                f"failed to connect to the gRPC server at {self._address} within {FUNCTION_EXECUTOR_READY_TIMEOUT_SEC} seconds"
+            )
+            raise
+    def state(self) -> Optional[Any]:
+        return self._state

indexify 0.2.40__py3-none-any.whl → 0.2.41__py3-none-any.whl

indexify 0.2.40py3-none-any.whl → 0.2.41py3-none-any.whl