PyPI - indexify - Versions diffs - 0.4.16__tar.gz → 0.4.18__tar.gz - Mend

indexify 0.4.16tar.gz → 0.4.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{indexify-0.4.16 → indexify-0.4.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: indexify
-Version: 0.4.16
+Version: 0.4.18
 Summary: Open Source Indexify components and helper tools
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0
@@ -14,10 +14,10 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
-Requires-Dist: boto3 (>=1.39.6,<2.0.0)
+Requires-Dist: boto3 (>=1.39.8,<2.0.0)
 Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
 Requires-Dist: psutil (>=7.0.0,<8.0.0)
-Requires-Dist: tensorlake (==0.2.25)
+Requires-Dist: tensorlake (==0.2.27)
 Project-URL: Repository, https://github.com/tensorlakeai/indexify
 Description-Content-Type: text/markdown

{indexify-0.4.16 → indexify-0.4.18}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "indexify"
 # Incremented if any of the components provided in this packages are updated.
-version = "0.4.16"
+version = "0.4.18"
 description = "Open Source Indexify components and helper tools"
 authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
 license = "Apache 2.0"
@@ -23,10 +23,10 @@ python = "^3.10"
 aiohttp = "^3.12.14"
 prometheus-client = "^0.22.1"
 psutil = "^7.0.0"
-boto3 = "^1.39.6"
+boto3 = "^1.39.8"
 # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
 # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
-tensorlake = "0.2.25"
+tensorlake = "0.2.27"
 # Uncomment the next line to use local tensorlake package (only for development!)
 # tensorlake = { path = "../tensorlake", develop = true }
 # pydantic is provided by tensorlake

indexify-0.4.18/src/indexify/executor/channel_manager.py ADDED Viewed

@@ -0,0 +1,167 @@
+import asyncio
+import time
+from typing import Any, Dict, Optional
+import grpc.aio
+import yaml
+from .metrics.channel_manager import (
+    metric_grpc_server_channel_creation_latency,
+    metric_grpc_server_channel_creation_retries,
+    metric_grpc_server_channel_creations,
+)
+_RETRY_INTERVAL_SEC = 5
+class ChannelManager:
+    def __init__(
+        self,
+        server_address: str,
+        config_path: Optional[str],
+        logger: Any,
+    ):
+        self._logger: Any = logger.bind(module=__name__, server_address=server_address)
+        self._server_address: str = server_address
+        self._channel_credentials: Optional[grpc.ChannelCredentials] = None
+        # Shared channel used by different Executor components to communicate with Server.
+        self._shared_channel_lock = asyncio.Lock()
+        self._shared_channel: Optional[grpc.aio.Channel] = None
+        self._init_tls(config_path)
+    def _init_tls(self, config_path: Optional[str]):
+        if config_path is None:
+            return
+        # The same config file format as in Tensorlake SDK HTTP client, see:
+        # https://github.com/tensorlakeai/tensorlake/blob/main/src/tensorlake/utils/http_client.py
+        with open(config_path, "r") as config_file:
+            config = yaml.safe_load(config_file)
+        if not config.get("use_tls", False):
+            return
+        tls_config: Dict[str, str] = config["tls_config"]
+        cert_path: Optional[str] = tls_config.get("cert_path", None)
+        key_path: Optional[str] = tls_config.get("key_path", None)
+        ca_bundle_path: Optional[str] = tls_config.get("ca_bundle_path", None)
+        self._logger = self._logger.bind(
+            cert_path=cert_path,
+            key_path=key_path,
+            ca_bundle_path=ca_bundle_path,
+        )
+        self._logger.info("TLS is enabled for grpc channels to server")
+        private_key: Optional[bytes] = None
+        certificate_chain: Optional[bytes] = None
+        root_certificates: Optional[bytes] = None
+        if cert_path is not None:
+            with open(cert_path, "rb") as cert_file:
+                certificate_chain = cert_file.read()
+        if key_path is not None:
+            with open(key_path, "rb") as key_file:
+                private_key = key_file.read()
+        if ca_bundle_path is not None:
+            with open(ca_bundle_path, "rb") as ca_bundle_file:
+                root_certificates = ca_bundle_file.read()
+        self._channel_credentials = grpc.ssl_channel_credentials(
+            root_certificates=root_certificates,
+            private_key=private_key,
+            certificate_chain=certificate_chain,
+        )
+    async def destroy(self):
+        # Okay to not hold the lock here as we're destroying the server channel forever.
+        if self._shared_channel is not None:
+            await self._destroy_shared_channel()
+    async def fail_shared_channel(self) -> None:
+        """Marks the shared channel as unhealthy and creates a new one.
+        Doesn't raise any exceptions.
+        """
+        async with self._shared_channel_lock:
+            if self._shared_channel is None:
+                self._logger.error(
+                    "grpc server channel doesn't exist, can't mark it unhealthy"
+                )
+                return
+            self._logger.info("marking grpc server channel as unhealthy")
+            # All the channel users will see it failing cause we destroyed it and call get_channel() again.
+            await self._destroy_shared_channel()
+    async def get_shared_channel(self) -> grpc.aio.Channel:
+        """Returns shared channel to the gRPC server.
+        The health of the shared channel is constantly monitored so it's more reliable than using a
+        standalone channel created for a particular short term need. Doesn't raise any exceptions.
+        """
+        # Use the lock to ensure that we only create one channel without race conditions.
+        async with self._shared_channel_lock:
+            if self._shared_channel is None:
+                await self._create_shared_channel()
+            return self._shared_channel
+    def create_standalone_channel(self) -> grpc.aio.Channel:
+        """Creates a new channel to the gRPC server.
+        Used for one-off RPCs where we don't need to monitor channel health or retry its creation indefinitely.
+        Raises an exception on failure.
+        """
+        with (
+            metric_grpc_server_channel_creation_retries.count_exceptions(),
+            metric_grpc_server_channel_creation_latency.time(),
+        ):
+            metric_grpc_server_channel_creations.inc()
+            if self._channel_credentials is None:
+                return grpc.aio.insecure_channel(target=self._server_address)
+            else:
+                return grpc.aio.secure_channel(
+                    target=self._server_address,
+                    credentials=self._channel_credentials,
+                )
+    async def _create_shared_channel(self) -> None:
+        """Creates new shared channel.
+        self._shared_channel_lock must be acquired before calling this method.
+        Never raises any exceptions.
+        """
+        while True:
+            try:
+                create_channel_start = time.monotonic()
+                self._logger.info("creating new grpc channel to server")
+                self._shared_channel = self.create_standalone_channel()
+                # Ensure the channel tried to connect to not get "channel closed errors" without actually trying to connect.
+                self._shared_channel.get_state(try_to_connect=True)
+                self._logger.info(
+                    "created new grpc channel to server",
+                    duration_sec=time.monotonic() - create_channel_start,
+                )
+                break
+            except Exception as e:
+                self._logger.error(
+                    f"failed creating grpc channel to server, retrying in {_RETRY_INTERVAL_SEC} seconds",
+                    exc_info=e,
+                )
+                await asyncio.sleep(_RETRY_INTERVAL_SEC)
+    async def _destroy_shared_channel(self) -> None:
+        """Closes the existing shared channel.
+        self._shared_channel_lock must be acquired before calling this method.
+        Never raises any exceptions.
+        """
+        try:
+            self._logger.info("closing grpc channel to server")
+            await self._shared_channel.close()
+            self._logger.info("closed grpc channel to server")
+        except Exception as e:
+            self._logger.error("failed closing grpc channel to server", exc_info=e)
+        self._shared_channel = None

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/metrics/state_reporter.py RENAMED Viewed

@@ -6,11 +6,11 @@ metric_state_report_rpcs = prometheus_client.Counter(
     "state_report_rpcs",
     "Number of Executor state report RPCs to Server",
 )
-metric_state_report_errors = prometheus_client.Counter(
+metric_state_report_rpc_errors = prometheus_client.Counter(
     "state_report_rpc_errors",
     "Number of Executor state report RPC errors",
 )
-metric_state_report_latency: prometheus_client.Histogram = (
+metric_state_report_rpc_latency: prometheus_client.Histogram = (
     latency_metric_for_fast_operation(
         "state_report_rpc", "Executor state report rpc to Server"
     )

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from typing import Optional
 from .health_checker import HealthChecker, HealthCheckResult
+from .metrics.health_checker import metric_healthy
 _HEALTH_CHECKER_NAME = "GenericHealthChecker"
@@ -13,13 +14,16 @@ class GenericHealthChecker(HealthChecker):
     def __init__(self):
         self._server_connection_unhealthy_status_message: Optional[str] = None
+        metric_healthy.set(1)
     def server_connection_state_changed(self, is_healthy: bool, status_message: str):
         """Handle changes in server connection state."""
         if is_healthy:
             self._server_connection_unhealthy_status_message = None
+            metric_healthy.set(1)
         else:
             self._server_connection_unhealthy_status_message = status_message
+            metric_healthy.set(0)
     async def check(self) -> HealthCheckResult:
         if self._server_connection_unhealthy_status_message is not None:

indexify-0.4.18/src/indexify/executor/monitoring/health_checker/metrics/health_checker.py ADDED Viewed

@@ -0,0 +1,5 @@
+import prometheus_client
+metric_healthy = prometheus_client.Gauge(
+    "healthy", "1 if the executor is healthy, 0 otherwise"
+)

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/state_reconciler.py RENAMED Viewed

@@ -1,6 +1,15 @@
 import asyncio
 from pathlib import Path
-from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Set
+from typing import (
+    Any,
+    AsyncIterable,
+    AsyncIterator,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Set,
+)
 from tensorlake.function_executor.proto.message_validator import MessageValidator
@@ -33,6 +42,10 @@ from .state_reporter import ExecutorStateReporter
 _RECONCILE_STREAM_BACKOFF_INTERVAL_SEC = 5
 _RECONCILIATION_RETRIES = 3
+# If we didn't get a new desired state from the stream within this timeout then the stream might
+# not be healthy due to network disruption. In this case we need to recreate the stream to make
+# sure that Server really doesn't want to send us a new state.
+_DESIRED_EXECUTOR_STATES_TIMEOUT_SEC = 5 * 60  # 5 minutes
 class ExecutorStateReconciler:
@@ -141,16 +154,15 @@ class ExecutorStateReconciler:
         Never raises any exceptions. Get cancelled via aio task cancellation.
         """
         while True:
+            desired_states_stream: Optional[AsyncIterable[DesiredExecutorState]] = None
             try:
-                stub = ExecutorAPIStub(await self._channel_manager.get_channel())
+                stub = ExecutorAPIStub(await self._channel_manager.get_shared_channel())
                 # Report state once before starting the stream so Server
                 # doesn't use stale state it knew about this Executor in the past.
                 await self._state_reporter.report_state_and_wait_for_completion()
-                desired_states_stream: AsyncGenerator[DesiredExecutorState, None] = (
-                    stub.get_desired_executor_states(
-                        GetDesiredExecutorStatesRequest(executor_id=self._executor_id)
-                    )
+                desired_states_stream = stub.get_desired_executor_states(
+                    GetDesiredExecutorStatesRequest(executor_id=self._executor_id)
                 )
                 self._logger.info("created new desired states stream")
                 await self._process_desired_states_stream(desired_states_stream)
@@ -159,6 +171,11 @@ class ExecutorStateReconciler:
                     f"error while processing desired states stream",
                     exc_info=e,
                 )
+            finally:
+                # Cleanly signal Server that the stream is closed by client.
+                # See https://stackoverflow.com/questions/72207914/how-to-stop-listening-on-a-stream-in-python-grpc-client
+                if desired_states_stream is not None:
+                    desired_states_stream.cancel()
             self._logger.info(
                 f"desired states stream closed, reconnecting in {self._server_backoff_interval_sec} sec"
@@ -166,10 +183,21 @@ class ExecutorStateReconciler:
             await asyncio.sleep(self._server_backoff_interval_sec)
     async def _process_desired_states_stream(
-        self, desired_states: AsyncGenerator[DesiredExecutorState, None]
+        self, desired_states: AsyncIterable[DesiredExecutorState]
     ):
-        async for new_state in desired_states:
-            new_state: DesiredExecutorState
+        desired_states_iter: AsyncIterator[DesiredExecutorState] = aiter(desired_states)
+        while True:
+            try:
+                new_state: DesiredExecutorState = await asyncio.wait_for(
+                    anext(desired_states_iter),
+                    timeout=_DESIRED_EXECUTOR_STATES_TIMEOUT_SEC,
+                )
+            except asyncio.TimeoutError:
+                self._logger.info(
+                    f"No desired state received from Server within {_DESIRED_EXECUTOR_STATES_TIMEOUT_SEC} sec, recreating the stream to ensure it is healthy"
+                )
+                break  # Timeout reached, stream might be unhealthy, exit the loop to recreate the stream.
             validator: MessageValidator = MessageValidator(new_state)
             try:
                 validator.required_field("clock")

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/state_reporter.py RENAMED Viewed

@@ -30,10 +30,11 @@ from .function_executor_controller.loggers import task_result_logger
 from .host_resources.host_resources import HostResources, HostResourcesProvider
 from .host_resources.nvidia_gpu import NVIDIA_GPU_MODEL
 from .metrics.state_reporter import (
-    metric_state_report_errors,
-    metric_state_report_latency,
+    metric_state_report_rpc_errors,
+    metric_state_report_rpc_latency,
     metric_state_report_rpcs,
 )
+from .monitoring.health_checker.health_checker import HealthChecker
 _REPORTING_INTERVAL_SEC = 5
 _REPORTING_BACKOFF_SEC = 5
@@ -49,6 +50,7 @@ class ExecutorStateReporter:
         function_allowlist: List[FunctionURI],
         channel_manager: ChannelManager,
         host_resources_provider: HostResourcesProvider,
+        health_checker: HealthChecker,
         logger: Any,
     ):
         self._executor_id: str = executor_id
@@ -57,6 +59,7 @@ class ExecutorStateReporter:
         self._labels.update(_executor_labels())
         self._hostname: str = gethostname()
         self._channel_manager = channel_manager
+        self._health_checker: HealthChecker = health_checker
         self._logger: Any = logger.bind(module=__name__)
         self._allowed_functions: List[AllowedFunction] = _to_allowed_function_protos(
             function_allowlist
@@ -167,10 +170,15 @@ class ExecutorStateReporter:
         # Don't retry state report if it failed during shutdown.
         # We only do best effort last state report and Server might not be available.
         try:
-            async with self._channel_manager.create_channel() as channel:
-                stub = ExecutorAPIStub(channel)
-                await self._report_state(stub)
-        except BaseException as e:
+            async with self._channel_manager.create_standalone_channel() as channel:
+                await ExecutorAPIStub(channel).report_executor_state(
+                    ReportExecutorStateRequest(
+                        executor_state=self._current_executor_state(),
+                        executor_update=self._remove_pending_update(),
+                    ),
+                    timeout=_REPORT_RPC_TIMEOUT_SEC,
+                )
+        except Exception as e:
             self._logger.error(
                 "failed to report state during shutdown",
                 exc_info=e,
@@ -187,60 +195,48 @@ class ExecutorStateReporter:
         Never raises any exceptions.
         """
         while True:
-            stub = ExecutorAPIStub(await self._channel_manager.get_channel())
+            stub = ExecutorAPIStub(await self._channel_manager.get_shared_channel())
             while True:
                 await self._state_report_scheduled_event.wait()
                 # Clear the event immidiately to report again asap if needed. This reduces latency in the system.
                 self._state_report_scheduled_event.clear()
                 try:
-                    # The periodic state reports serve as channel health monitoring requests
-                    # (same as TCP keep-alive). Channel Manager returns the same healthy channel
-                    # for all RPCs that we do from Executor to Server. So all the RPCs benefit
-                    # from this channel health monitoring.
-                    await self._report_state(stub)
+                    state: ExecutorState = self._current_executor_state()
+                    update: ExecutorUpdate = self._remove_pending_update()
+                    _log_reported_executor_update(update, self._logger)
+                    with (
+                        metric_state_report_rpc_errors.count_exceptions(),
+                        metric_state_report_rpc_latency.time(),
+                    ):
+                        metric_state_report_rpcs.inc()
+                        await stub.report_executor_state(
+                            ReportExecutorStateRequest(
+                                executor_state=state, executor_update=update
+                            ),
+                            timeout=_REPORT_RPC_TIMEOUT_SEC,
+                        )
                     self._state_reported_event.set()
+                    self._health_checker.server_connection_state_changed(
+                        is_healthy=True, status_message="grpc server channel is healthy"
+                    )
                 except Exception as e:
+                    self._add_to_pending_update(update)
                     self._logger.error(
                         f"failed to report state to the server, backing-off for {_REPORTING_BACKOFF_SEC} sec.",
                         exc_info=e,
                     )
+                    # The periodic state reports serve as channel health monitoring requests
+                    # (same as TCP keep-alive). Channel Manager returns the same healthy channel
+                    # for all RPCs that we do from Executor to Server. So all the RPCs benefit
+                    # from this channel health monitoring.
+                    self._health_checker.server_connection_state_changed(
+                        is_healthy=False,
+                        status_message="grpc server channel is unhealthy",
+                    )
+                    await self._channel_manager.fail_shared_channel()
                     await asyncio.sleep(_REPORTING_BACKOFF_SEC)
-                    break  # exit the inner loop to recreate the channel if needed
-    async def _report_state(self, stub: ExecutorAPIStub):
-        """Reports the current state to the server represented by the supplied stub.
-        Raises an exception on failure.
-        """
-        with (
-            metric_state_report_errors.count_exceptions(),
-            metric_state_report_latency.time(),
-        ):
-            metric_state_report_rpcs.inc()
-            state: ExecutorState = self._current_executor_state()
-            update: ExecutorUpdate = self._remove_pending_update()
-            for task_result in update.task_results:
-                task_result_logger(task_result, self._logger).info(
-                    "reporting task outcome",
-                    outcome_code=TaskOutcomeCode.Name(task_result.outcome_code),
-                    failure_reason=(
-                        TaskFailureReason.Name(task_result.failure_reason)
-                        if task_result.HasField("failure_reason")
-                        else "None"
-                    ),
-                )
-            try:
-                await stub.report_executor_state(
-                    ReportExecutorStateRequest(
-                        executor_state=state, executor_update=update
-                    ),
-                    timeout=_REPORT_RPC_TIMEOUT_SEC,
-                )
-            except Exception as e:
-                self._add_to_pending_update(update)
-                raise
+                    break  # exit the inner loop to use the recreated channel
     def _current_executor_state(self) -> ExecutorState:
         """Returns the current executor state."""
@@ -284,6 +280,28 @@ class ExecutorStateReporter:
             self.add_function_executor_update(function_executor_update)
+def _log_reported_executor_update(update: ExecutorUpdate, logger: Any) -> None:
+    """Logs the reported executor update.
+    Doesn't raise any exceptions."""
+    try:
+        for task_result in update.task_results:
+            task_result_logger(task_result, logger).info(
+                "reporting task outcome",
+                outcome_code=TaskOutcomeCode.Name(task_result.outcome_code),
+                failure_reason=(
+                    TaskFailureReason.Name(task_result.failure_reason)
+                    if task_result.HasField("failure_reason")
+                    else "None"
+                ),
+            )
+    except Exception as e:
+        logger.error(
+            "failed to log reported executor update",
+            exc_info=e,
+        )
 def _to_allowed_function_protos(
     function_allowlist: List[FunctionURI],
 ) -> List[AllowedFunction]:

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/proto/executor_api.proto RENAMED Viewed

@@ -288,7 +288,5 @@ service ExecutorAPI {
     // Called by Executor to open a stream of its desired states. When Server wants Executor to change something
     // it puts a message on the stream with the new desired state of the Executor.
-    //
-    // Deprecated HTTP API is used to download the serialized graph and task inputs.
     rpc get_desired_executor_states(GetDesiredExecutorStatesRequest) returns (stream DesiredExecutorState) {}
 }

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/proto/executor_api_pb2_grpc.py RENAMED Viewed

@@ -79,8 +79,6 @@ class ExecutorAPIServicer(object):
     def get_desired_executor_states(self, request, context):
         """Called by Executor to open a stream of its desired states. When Server wants Executor to change something
         it puts a message on the stream with the new desired state of the Executor.
-        Deprecated HTTP API is used to download the serialized graph and task inputs.
         """
         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
         context.set_details("Method not implemented!")

indexify-0.4.16/src/indexify/executor/channel_manager.py DELETED Viewed

@@ -1,195 +0,0 @@
-import asyncio
-import time
-from typing import Any, Dict, Optional
-import grpc.aio
-import yaml
-from .metrics.channel_manager import (
-    metric_grpc_server_channel_creation_latency,
-    metric_grpc_server_channel_creation_retries,
-    metric_grpc_server_channel_creations,
-)
-from .monitoring.health_checker.health_checker import HealthChecker
-_RETRY_INTERVAL_SEC = 5
-_CONNECT_TIMEOUT_SEC = 5
-class ChannelManager:
-    def __init__(
-        self,
-        server_address: str,
-        config_path: Optional[str],
-        health_checker: HealthChecker,
-        logger: Any,
-    ):
-        self._logger: Any = logger.bind(module=__name__, server_address=server_address)
-        self._server_address: str = server_address
-        self._health_checker: HealthChecker = health_checker
-        self._channel_credentials: Optional[grpc.ChannelCredentials] = None
-        # This lock protects the fields below.
-        self._lock = asyncio.Lock()
-        self._channel: Optional[grpc.aio.Channel] = None
-        self._init_tls(config_path)
-    def _init_tls(self, config_path: Optional[str]):
-        if config_path is None:
-            return
-        # The same config file format as in Tensorlake SDK HTTP client, see:
-        # https://github.com/tensorlakeai/tensorlake/blob/main/src/tensorlake/utils/http_client.py
-        with open(config_path, "r") as config_file:
-            config = yaml.safe_load(config_file)
-        if not config.get("use_tls", False):
-            return
-        tls_config: Dict[str, str] = config["tls_config"]
-        cert_path: Optional[str] = tls_config.get("cert_path", None)
-        key_path: Optional[str] = tls_config.get("key_path", None)
-        ca_bundle_path: Optional[str] = tls_config.get("ca_bundle_path", None)
-        self._logger = self._logger.bind(
-            cert_path=cert_path,
-            key_path=key_path,
-            ca_bundle_path=ca_bundle_path,
-        )
-        self._logger.info("TLS is enabled for grpc channels to server")
-        private_key: Optional[bytes] = None
-        certificate_chain: Optional[bytes] = None
-        root_certificates: Optional[bytes] = None
-        if cert_path is not None:
-            with open(cert_path, "rb") as cert_file:
-                certificate_chain = cert_file.read()
-        if key_path is not None:
-            with open(key_path, "rb") as key_file:
-                private_key = key_file.read()
-        if ca_bundle_path is not None:
-            with open(ca_bundle_path, "rb") as ca_bundle_file:
-                root_certificates = ca_bundle_file.read()
-        self._channel_credentials = grpc.ssl_channel_credentials(
-            root_certificates=root_certificates,
-            private_key=private_key,
-            certificate_chain=certificate_chain,
-        )
-    async def destroy(self):
-        if self._channel is not None:
-            await self._destroy_locked_channel()
-    async def get_channel(self) -> grpc.aio.Channel:
-        """Returns a channel to the gRPC server.
-        Returns a ready to use channel. Blocks until the channel is ready,
-        never raises any exceptions.
-        If previously returned channel is healthy then returns it again.
-        Otherwise, returns a new channel but closes the previously returned one.
-        """
-        # Use the lock to ensure that we only create one channel without race conditions.
-        async with self._lock:
-            if self._channel is None:
-                # Only called on Executor startup when we establish the channel for the first time.
-                self._channel = await self._create_ready_channel()
-            elif not await self._locked_channel_is_healthy():
-                self._logger.info("grpc channel to server is unhealthy")
-                self._health_checker.server_connection_state_changed(
-                    is_healthy=False,
-                    status_message="grpc channel to server is unhealthy",
-                )
-                await self._destroy_locked_channel()
-                self._channel = await self._create_ready_channel()
-                self._health_checker.server_connection_state_changed(
-                    is_healthy=True, status_message="grpc channel to server is healthy"
-                )
-            return self._channel
-    def create_channel(self) -> grpc.aio.Channel:
-        """Creates a new channel to the gRPC server.
-        The channel is not ready to use. Raises an exception on failure.
-        """
-        if self._channel_credentials is None:
-            return grpc.aio.insecure_channel(target=self._server_address)
-        else:
-            return grpc.aio.secure_channel(
-                target=self._server_address,
-                credentials=self._channel_credentials,
-            )
-    async def _create_ready_channel(self) -> grpc.aio.Channel:
-        """Creates a new channel to the gRPC server."
-        Returns a ready to use channel. Blocks until the channel
-        is ready, never raises any exceptions.
-        """
-        with metric_grpc_server_channel_creation_latency.time():
-            metric_grpc_server_channel_creations.inc()
-            while True:
-                try:
-                    self._logger.info("creating new grpc server channel")
-                    create_channel_start = time.monotonic()
-                    channel: grpc.Channel = self.create_channel()
-                    self._logger.info(
-                        "grpc server channel created",
-                        duration_sec=time.monotonic() - create_channel_start,
-                    )
-                    channel_ready_start = time.monotonic()
-                    await asyncio.wait_for(
-                        channel.channel_ready(),
-                        timeout=_CONNECT_TIMEOUT_SEC,
-                    )
-                    self._logger.info(
-                        "grpc server channel is established (ready)",
-                        duration_sec=time.monotonic() - channel_ready_start,
-                    )
-                    return channel
-                except BaseException:
-                    self._logger.error(
-                        f"failed establishing grpc server channel in {_CONNECT_TIMEOUT_SEC} sec, retrying in {_RETRY_INTERVAL_SEC} sec"
-                    )
-                    try:
-                        await channel.close()
-                    except BaseException as e:
-                        self._logger.error(
-                            "failed closing not established channel", exc_info=e
-                        )
-                    metric_grpc_server_channel_creation_retries.inc()
-                    await asyncio.sleep(_RETRY_INTERVAL_SEC)
-    async def _locked_channel_is_healthy(self) -> bool:
-        """Checks if the channel is healthy.
-        Returns True if the channel is healthy, False otherwise.
-        self._lock must be acquired before calling this method.
-        Never raises any exceptions.
-        """
-        try:
-            return self._channel.get_state() == grpc.ChannelConnectivity.READY
-        except Exception as e:
-            # Assume that the channel is healthy because get_state() method is marked as experimental
-            # so we can't fully trust it.
-            self._logger.error(
-                "failed getting channel state, assuming channel is healthy", exc_info=e
-            )
-            return True
-    async def _destroy_locked_channel(self):
-        """Closes the existing channel.
-        self._lock must be acquired before calling this method.
-        Never raises any exceptions.
-        """
-        try:
-            await self._channel.close()
-        except Exception as e:
-            self._logger.error("failed closing channel", exc_info=e)
-        self._channel = None

{indexify-0.4.16 → indexify-0.4.18}/README.md RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/cli/__init__.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/cli/build_image.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/cli/deploy.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/cli/executor.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/README.md RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/blob_store/blob_store.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/blob_store/local_fs_blob_store.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/blob_store/metrics/blob_store.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/blob_store/s3_blob_store.py RENAMED Viewed

File without changes

{indexify-0.4.16 → indexify-0.4.18}/src/indexify/executor/executor.py RENAMED Viewed

@@ -69,7 +69,6 @@ class Executor:
         self._channel_manager = ChannelManager(
             server_address=grpc_server_addr,
             config_path=config_path,
-            health_checker=health_checker,
             logger=self._logger,
         )
         function_allowlist: List[FunctionURI] = parse_function_uris(function_uris)
@@ -80,6 +79,7 @@ class Executor:
             function_allowlist=function_allowlist,
             channel_manager=self._channel_manager,
             host_resources_provider=host_resources_provider,
+            health_checker=health_checker,
             logger=self._logger,
         )
         self._state_reporter.update_executor_status(