PyPI - indexify - Versions diffs - 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl - Mend

indexify 0.2.44py3-none-any.whl → 0.2.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

indexify/executor/function_executor/invocation_state_client.py ADDED Viewed

@@ -0,0 +1,232 @@
+import asyncio
+from typing import Any, AsyncGenerator, Optional, Union
+import grpc
+import httpx
+from indexify.executor.downloader import serialized_object_from_http_response
+from indexify.function_executor.proto.function_executor_pb2 import (
+    GetInvocationStateResponse,
+    InvocationStateRequest,
+    InvocationStateResponse,
+    SerializedObject,
+    SetInvocationStateResponse,
+)
+from indexify.function_executor.proto.function_executor_pb2_grpc import (
+    FunctionExecutorStub,
+)
+from indexify.function_executor.proto.message_validator import MessageValidator
+class InvocationStateClient:
+    """InvocationStateClient is a client for the invocation state server of a Function Executor.
+    The client initializes the Function Executor's invocation state server and executes requests
+    it sends to the client.
+    """
+    def __init__(
+        self,
+        stub: FunctionExecutorStub,
+        base_url: str,
+        http_client: httpx.AsyncClient,
+        graph: str,
+        namespace: str,
+        logger: Any,
+    ):
+        self._stub: FunctionExecutorStub = stub
+        self._base_url: str = base_url
+        self._http_client: httpx.AsyncClient = http_client
+        self._graph: str = graph
+        self._namespace: str = namespace
+        self._logger: Any = logger.bind(
+            module=__name__, graph=graph, namespace=namespace
+        )
+        self._client_response_queue: asyncio.Queue[
+            Union[InvocationStateResponse, str]
+        ] = asyncio.Queue()
+        self._task_id_to_invocation_id: dict[str, str] = {}
+        self._request_loop_task: Optional[asyncio.Task] = None
+    async def start(self) -> None:
+        """Starts the invocation state client.
+        This method initializes the Function Executor's invocation state server first.
+        This is why this method needs to be awaited before executing any tasks on the Function Executor
+        that might use invocation state feature."""
+        server_requests = self._stub.initialize_invocation_state_server(
+            self._response_generator()
+        )
+        self._request_loop_task = asyncio.create_task(
+            self._request_loop(server_requests)
+        )
+    def add_task_to_invocation_id_entry(self, task_id: str, invocation_id: str) -> None:
+        """Adds a task ID to invocation ID entry to the client's internal state.
+        This allows to authorize requests to the invocation state server.
+        If a request is not comming from the task ID that was added here then it will
+        be rejected. It's caller's responsibility to only add task IDs that are being
+        executed by the Function Executor so the Function Executor can't get access to
+        invocation state of tasks it doesn't run."""
+        self._task_id_to_invocation_id[task_id] = invocation_id
+    def remove_task_to_invocation_id_entry(self, task_id: str) -> None:
+        del self._task_id_to_invocation_id[task_id]
+    async def destroy(self) -> None:
+        if self._request_loop_task is not None:
+            self._request_loop_task.cancel()
+        await self._client_response_queue.put("shutdown")
+    async def _request_loop(
+        self, server_requests: AsyncGenerator[InvocationStateRequest, None]
+    ) -> None:
+        try:
+            async for request in server_requests:
+                await self._process_request_no_raise(request)
+        except grpc.aio.AioRpcError:
+            # Reading from the stream failed.
+            # This is a normal situation when the server is shutting down.
+            pass
+        except asyncio.CancelledError:
+            # This async task was cancelled by destroy(). Normal situation too.
+            pass
+    async def _process_request_no_raise(self, request: InvocationStateRequest) -> None:
+        try:
+            await self._process_request(request)
+        except Exception as e:
+            try:
+                await self._client_response_queue.put(
+                    InvocationStateResponse(
+                        request_id=request.request_id,
+                        success=False,
+                    )
+                )
+            except Exception as ee:
+                self._logger.error("failed to send error response", exc_info=ee)
+            self._logger.error(
+                "failed to process request",
+                exc_info=e,
+                request_id=request.request_id,
+            )
+    async def _process_request(
+        self, request: InvocationStateRequest
+    ) -> InvocationStateResponse:
+        self._validate_request(request)
+        # This is a very important check. We don't trust invocation ID and task ID
+        # supplied by Function Executor. If a task ID entry doesn't exist then it's
+        # a privelege escalation attempt.
+        invocation_id: str = self._task_id_to_invocation_id[request.task_id]
+        if request.HasField("get"):
+            value: Optional[SerializedObject] = await self._get_server_state(
+                invocation_id, request.get.key
+            )
+            await self._client_response_queue.put(
+                InvocationStateResponse(
+                    request_id=request.request_id,
+                    success=True,
+                    get=GetInvocationStateResponse(
+                        key=request.get.key,
+                        value=value,
+                    ),
+                )
+            )
+        elif request.HasField("set"):
+            await self._set_server_state(
+                invocation_id, request.set.key, request.set.value
+            )
+            await self._client_response_queue.put(
+                InvocationStateResponse(
+                    request_id=request.request_id,
+                    success=True,
+                    set=SetInvocationStateResponse(),
+                )
+            )
+    async def _response_generator(
+        self,
+    ) -> AsyncGenerator[InvocationStateResponse, None]:
+        while True:
+            response = await self._client_response_queue.get()
+            # Hacky cancellation of the generator.
+            if response == "shutdown":
+                break
+            yield response
+    async def _set_server_state(
+        self, invocation_id: str, key: str, value: SerializedObject
+    ) -> None:
+        url: str = (
+            f"{self._base_url}/internal/namespaces/{self._namespace}/compute_graphs/{self._graph}/invocations/{invocation_id}/ctx/{key}"
+        )
+        payload = value.bytes if value.HasField("bytes") else value.string
+        response = await self._http_client.post(
+            url=url,
+            files=[
+                (
+                    "value",
+                    ("value", payload, value.content_type),
+                ),
+            ],
+        )
+        try:
+            response.raise_for_status()
+        except Exception as e:
+            self._logger.error(
+                "failed to set graph invocation state",
+                invocation_id=invocation_id,
+                key=key,
+                status_code=response.status_code,
+                error=response.text,
+                exc_info=e,
+            )
+            raise
+    async def _get_server_state(
+        self, invocation_id: str, key: str
+    ) -> Optional[SerializedObject]:
+        url: str = (
+            f"{self._base_url}/internal/namespaces/{self._namespace}/compute_graphs/{self._graph}/invocations/{invocation_id}/ctx/{key}"
+        )
+        response: httpx.Response = await self._http_client.get(url)
+        if response.status_code == 404:
+            return None
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            self._logger.error(
+                f"failed to download graph invocation state value",
+                invocation_id=invocation_id,
+                key=key,
+                status_code=response.status_code,
+                error=response.text,
+                exc_info=e,
+            )
+            raise
+        return serialized_object_from_http_response(response)
+    def _validate_request(self, request: InvocationStateRequest) -> None:
+        (
+            MessageValidator(request)
+            .required_field("request_id")
+            .required_field("task_id")
+        )
+        if request.HasField("get"):
+            (MessageValidator(request.get).required_field("key"))
+        elif request.HasField("set"):
+            (
+                MessageValidator(request.set)
+                .required_field("key")
+                .required_serialized_object("value")
+            )
+        else:
+            raise ValueError("unknown request type")

indexify/executor/function_executor/server/function_executor_server.py ADDED Viewed

@@ -0,0 +1,24 @@
+from typing import Any
+import grpc
+# Timeout for Function Executor Server startup in seconds. The timeout is counted from
+# the moment when a server just started.
+FUNCTION_EXECUTOR_SERVER_READY_TIMEOUT_SEC = 5
+class FunctionExecutorServer:
+    """Abstract interface for a Function Executor Server.
+    FunctionExecutorServer is a class that executes tasks for a particular function.
+    The communication with FunctionExecutorServer is typicall done via gRPC.
+    """
+    async def create_channel(self, logger: Any) -> grpc.aio.Channel:
+        """Creates a new async gRPC channel to the Function Executor Server.
+        The channel is in ready state. It can only be used in the same thread where the
+        function was called. Caller should close the channel when it's no longer needed.
+        Raises Exception if an error occurred."""
+        raise NotImplementedError

indexify/executor/function_executor/server/function_executor_server_factory.py ADDED Viewed

@@ -0,0 +1,43 @@
+from typing import Any, Optional
+from .function_executor_server import FunctionExecutorServer
+class FunctionExecutorServerConfiguration:
+    """Configuration for creating a FunctionExecutorServer.
+    This configuration only includes data that must be known
+    during creation of the FunctionExecutorServer. If some data
+    is not required during the creation then it shouldn't be here.
+    A particular factory implementation might ignore certain
+    configuration parameters or raise an exception if it can't implement
+    them."""
+    def __init__(self, image_uri: Optional[str]):
+        # Container image URI of the Function Executor Server.
+        self.image_uri: Optional[str] = image_uri
+class FunctionExecutorServerFactory:
+    """Abstract class for creating FunctionExecutorServers."""
+    async def create(
+        self, config: FunctionExecutorServerConfiguration, logger: Any
+    ) -> FunctionExecutorServer:
+        """Creates a new FunctionExecutorServer.
+        Raises an exception if the creation failed or the configuration is not supported.
+        Args:
+            config: configuration of the FunctionExecutorServer.
+            logger: logger to be used during the function call."""
+        raise NotImplementedError()
+    async def destroy(self, server: FunctionExecutorServer, logger: Any) -> None:
+        """Destroys the FunctionExecutorServer and release all its resources.
+        Args:
+            logger: logger to be used during the function call.
+        FunctionExecutorServer and customer code that it's running are not notified about the destruction.
+        Never raises any Exceptions."""
+        raise NotImplementedError

indexify/executor/function_executor/server/subprocess_function_executor_server.py ADDED Viewed

@@ -0,0 +1,25 @@
+import asyncio
+from typing import Any
+import grpc
+from indexify.function_executor.proto.configuration import GRPC_CHANNEL_OPTIONS
+from .function_executor_server import FunctionExecutorServer
+class SubprocessFunctionExecutorServer(FunctionExecutorServer):
+    """A FunctionExecutorServer that runs in a child process."""
+    def __init__(
+        self,
+        process: asyncio.subprocess.Process,
+        port: int,
+        address: str,
+    ):
+        self._proc = process
+        self._port = port
+        self._address = address
+    async def create_channel(self, logger: Any) -> grpc.aio.Channel:
+        return grpc.aio.insecure_channel(self._address, options=GRPC_CHANNEL_OPTIONS)

indexify/executor/function_executor/{process_function_executor_factory.py → server/subprocess_function_executor_server_factory.py} RENAMED Viewed

@@ -1,26 +1,32 @@
 import asyncio
 from typing import Any, Optional
-from .function_executor_factory import FunctionExecutorFactory
-from .process_function_executor import ProcessFunctionExecutor
+from .function_executor_server_factory import (
+    FunctionExecutorServerConfiguration,
+    FunctionExecutorServerFactory,
+)
+from .subprocess_function_executor_server import (
+    SubprocessFunctionExecutorServer,
+)
-class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
+class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
     def __init__(
         self,
-        indexify_server_address: str,
         development_mode: bool,
-        config_path: Optional[str],
     ):
-        self._indexify_server_address: str = indexify_server_address
         self._development_mode: bool = development_mode
-        self._config_path: Optional[str] = config_path
         # Registred ports range end at 49151. We start from 50000 to hopefully avoid conflicts.
         self._free_ports = set(range(50000, 51000))
     async def create(
-        self, logger: Any, state: Optional[Any] = None
-    ) -> ProcessFunctionExecutor:
+        self, config: FunctionExecutorServerConfiguration, logger: Any
+    ) -> SubprocessFunctionExecutorServer:
+        if config.image_uri is not None:
+            raise ValueError(
+                "SubprocessFunctionExecutorServerFactory doesn't support container images"
+            )
         logger = logger.bind(module=__name__)
         port: Optional[int] = None
@@ -30,13 +36,9 @@ class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
                 "function-executor",
                 "--function-executor-server-address",
                 _server_address(port),
-                "--indexify-server-address",
-                self._indexify_server_address,
             ]
             if self._development_mode:
                 args.append("--dev")
-            if self._config_path is not None:
-                args.extend(["--config-path", self._config_path])
             # Run the process with our stdout, stderr. We want to see process logs and exceptions in our process output.
             # This is useful for dubugging. Customer function stdout and stderr is captured and returned in the response
             # so we won't see it in our process outputs. This is the right behavior as customer function stdout and stderr
@@ -45,12 +47,10 @@ class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
                 "indexify-cli",
                 *args,
             )
-            return ProcessFunctionExecutor(
+            return SubprocessFunctionExecutorServer(
                 process=proc,
                 port=port,
                 address=_server_address(port),
-                logger=logger,
-                state=state,
             )
         except Exception as e:
             if port is not None:
@@ -61,9 +61,11 @@ class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
             )
             raise
-    async def destroy(self, executor: ProcessFunctionExecutor, logger: Any) -> None:
-        proc: asyncio.subprocess.Process = executor._proc
-        port: int = executor._port
+    async def destroy(
+        self, server: SubprocessFunctionExecutorServer, logger: Any
+    ) -> None:
+        proc: asyncio.subprocess.Process = server._proc
+        port: int = server._port
         logger = logger.bind(
             module=__name__,
             pid=proc.pid,
@@ -84,8 +86,6 @@ class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
             )
         finally:
             self._release_port(port)
-            if executor._channel is not None:
-                await executor._channel.close()
     def _allocate_port(self) -> int:
         # No asyncio.Lock is required here because this operation never awaits

indexify/executor/function_executor/single_task_runner.py ADDED Viewed

@@ -0,0 +1,160 @@
+from typing import Any, Optional
+import grpc
+from indexify.function_executor.proto.function_executor_pb2 import (
+    InitializeRequest,
+    RunTaskRequest,
+    RunTaskResponse,
+)
+from indexify.function_executor.proto.function_executor_pb2_grpc import (
+    FunctionExecutorStub,
+)
+from ..api_objects import Task
+from .function_executor import FunctionExecutor
+from .function_executor_state import FunctionExecutorState
+from .server.function_executor_server_factory import (
+    FunctionExecutorServerConfiguration,
+    FunctionExecutorServerFactory,
+)
+from .task_input import TaskInput
+from .task_output import TaskOutput
+class SingleTaskRunner:
+    def __init__(
+        self,
+        function_executor_state: FunctionExecutorState,
+        task_input: TaskInput,
+        function_executor_server_factory: FunctionExecutorServerFactory,
+        base_url: str,
+        config_path: Optional[str],
+        logger: Any,
+    ):
+        self._state: FunctionExecutorState = function_executor_state
+        self._task_input: TaskInput = task_input
+        self._factory: FunctionExecutorServerFactory = function_executor_server_factory
+        self._base_url: str = base_url
+        self._config_path: Optional[str] = config_path
+        self._logger = logger.bind(module=__name__)
+    async def run(self) -> TaskOutput:
+        """Runs the task in the Function Executor.
+        The FunctionExecutorState must be locked by the caller.
+        The lock is released during actual task run in the server.
+        The lock is relocked on return.
+        Raises an exception if an error occured."""
+        self._state.check_locked()
+        if self._state.function_executor is None:
+            self._state.function_executor = await self._create_function_executor()
+        return await self._run()
+    async def _create_function_executor(self) -> FunctionExecutor:
+        function_executor: FunctionExecutor = FunctionExecutor(
+            server_factory=self._factory, logger=self._logger
+        )
+        try:
+            config: FunctionExecutorServerConfiguration = (
+                FunctionExecutorServerConfiguration(
+                    image_uri=self._task_input.task.image_uri,
+                )
+            )
+            initialize_request: InitializeRequest = InitializeRequest(
+                namespace=self._task_input.task.namespace,
+                graph_name=self._task_input.task.compute_graph,
+                graph_version=self._task_input.task.graph_version,
+                function_name=self._task_input.task.compute_fn,
+                graph=self._task_input.graph,
+            )
+            await function_executor.initialize(
+                config=config,
+                initialize_request=initialize_request,
+                base_url=self._base_url,
+                config_path=self._config_path,
+            )
+            return function_executor
+        except Exception as e:
+            self._logger.error(
+                "failed to initialize function executor",
+                exc_info=e,
+            )
+            await function_executor.destroy()
+            raise
+    async def _run(self) -> TaskOutput:
+        request: RunTaskRequest = RunTaskRequest(
+            graph_invocation_id=self._task_input.task.invocation_id,
+            task_id=self._task_input.task.id,
+            function_input=self._task_input.input,
+        )
+        if self._task_input.init_value is not None:
+            request.function_init_value.CopyFrom(self._task_input.init_value)
+        channel: grpc.aio.Channel = self._state.function_executor.channel()
+        async with _RunningTaskContextManager(
+            task_input=self._task_input, function_executor_state=self._state
+        ):
+            response: RunTaskResponse = await FunctionExecutorStub(channel).run_task(
+                request
+            )
+            return _task_output(task=self._task_input.task, response=response)
+class _RunningTaskContextManager:
+    """Performs all the actions required before and after running a task."""
+    def __init__(
+        self, task_input: TaskInput, function_executor_state: FunctionExecutorState
+    ):
+        self._task_input: TaskInput = task_input
+        self._state: FunctionExecutorState = function_executor_state
+    async def __aenter__(self):
+        self._state.increment_running_tasks()
+        self._state.function_executor.invocation_state_client().add_task_to_invocation_id_entry(
+            task_id=self._task_input.task.id,
+            invocation_id=self._task_input.task.invocation_id,
+        )
+        # Unlock the state so other tasks can act depending on it.
+        self._state.lock.release()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self._state.lock.acquire()
+        self._state.decrement_running_tasks()
+        self._state.function_executor.invocation_state_client().remove_task_to_invocation_id_entry(
+            task_id=self._task_input.task.id
+        )
+def _task_output(task: Task, response: RunTaskResponse) -> TaskOutput:
+    required_fields = [
+        "stdout",
+        "stderr",
+        "is_reducer",
+        "success",
+    ]
+    for field in required_fields:
+        if not response.HasField(field):
+            raise ValueError(f"Response is missing required field: {field}")
+    output = TaskOutput(
+        task=task,
+        stdout=response.stdout,
+        stderr=response.stderr,
+        reducer=response.is_reducer,
+        success=response.success,
+    )
+    if response.HasField("function_output"):
+        output.function_output = response.function_output
+    if response.HasField("router_output"):
+        output.router_output = response.router_output
+    return output

indexify/executor/function_executor/task_input.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Optional
+from indexify.function_executor.proto.function_executor_pb2 import (
+    SerializedObject,
+)
+from ..api_objects import Task
+class TaskInput:
+    """Task with all the resources required to run it."""
+    def __init__(
+        self,
+        task: Task,
+        graph: SerializedObject,
+        input: SerializedObject,
+        init_value: Optional[SerializedObject],
+    ):
+        self.task: Task = task
+        self.graph: SerializedObject = graph
+        self.input: SerializedObject = input
+        self.init_value: Optional[SerializedObject] = init_value

indexify/executor/function_executor/task_output.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing import Optional
+from indexify.function_executor.proto.function_executor_pb2 import (
+    FunctionOutput,
+    RouterOutput,
+)
+from ..api_objects import Task
+class TaskOutput:
+    """Result of running a task."""
+    def __init__(
+        self,
+        task: Task,
+        function_output: Optional[FunctionOutput] = None,
+        router_output: Optional[RouterOutput] = None,
+        stdout: Optional[str] = None,
+        stderr: Optional[str] = None,
+        reducer: bool = False,
+        success: bool = False,
+    ):
+        self.task = task
+        self.function_output = function_output
+        self.router_output = router_output
+        self.stdout = stdout
+        self.stderr = stderr
+        self.reducer = reducer
+        self.success = success
+    @classmethod
+    def internal_error(cls, task: Task) -> "TaskOutput":
+        """Creates a TaskOutput for an internal error."""
+        # We are not sharing internal error messages with the customer.
+        return TaskOutput(task=task, stderr="Platform failed to execute the function.")

indexify 0.2.44__py3-none-any.whl → 0.2.46__py3-none-any.whl

indexify 0.2.44py3-none-any.whl → 0.2.46py3-none-any.whl