PyPI - indexify - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

indexify 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

indexify/cli/cli.py +38 -78
indexify/executor/api_objects.py +4 -0
indexify/executor/downloader.py +45 -5
indexify/executor/executor.py +103 -16
indexify/executor/function_executor/function_executor.py +174 -55
indexify/executor/function_executor/function_executor_state.py +6 -0
indexify/executor/function_executor/function_executor_states_container.py +64 -0
indexify/executor/function_executor/health_checker.py +20 -10
indexify/executor/function_executor/invocation_state_client.py +31 -6
indexify/executor/function_executor/metrics/function_executor.py +142 -0
indexify/executor/function_executor/metrics/function_executor_state.py +10 -0
indexify/executor/function_executor/metrics/function_executor_state_container.py +10 -0
indexify/executor/function_executor/metrics/health_checker.py +14 -0
indexify/executor/function_executor/metrics/invocation_state_client.py +45 -0
indexify/executor/function_executor/metrics/single_task_runner.py +22 -0
indexify/executor/function_executor/single_task_runner.py +44 -15
indexify/executor/function_executor/task_output.py +7 -1
indexify/executor/metrics/downloader.py +69 -0
indexify/executor/metrics/executor.py +51 -0
indexify/executor/metrics/task_fetcher.py +21 -0
indexify/executor/metrics/task_reporter.py +22 -0
indexify/executor/metrics/task_runner.py +45 -0
indexify/executor/monitoring/function_allowlist.py +25 -0
indexify/executor/monitoring/handler.py +8 -0
indexify/executor/monitoring/health_check_handler.py +20 -0
indexify/executor/monitoring/health_checker/generic_health_checker.py +58 -0
indexify/executor/monitoring/health_checker/health_checker.py +23 -0
indexify/executor/monitoring/metrics.py +245 -0
indexify/executor/monitoring/prometheus_metrics_handler.py +18 -0
indexify/executor/monitoring/server.py +41 -0
indexify/executor/monitoring/startup_probe_handler.py +17 -0
indexify/executor/task_fetcher.py +15 -1
indexify/executor/task_reporter.py +24 -7
indexify/executor/task_runner.py +64 -46
{indexify-0.3.8.dist-info → indexify-0.3.10.dist-info}/METADATA +4 -2
indexify-0.3.10.dist-info/RECORD +46 -0
indexify-0.3.8.dist-info/RECORD +0 -25
{indexify-0.3.8.dist-info → indexify-0.3.10.dist-info}/WHEEL +0 -0
{indexify-0.3.8.dist-info → indexify-0.3.10.dist-info}/entry_points.txt +0 -0

indexify/executor/function_executor/function_executor.py CHANGED Viewed

@@ -13,6 +13,33 @@ from tensorlake.utils.http_client import get_httpx_client
 from .health_checker import HealthChecker
 from .invocation_state_client import InvocationStateClient
+from .metrics.function_executor import (
+    metric_create_errors,
+    metric_create_health_checker_errors,
+    metric_create_health_checker_latency,
+    metric_create_invocation_state_client_errors,
+    metric_create_invocation_state_client_latency,
+    metric_create_latency,
+    metric_create_server_errors,
+    metric_create_server_latency,
+    metric_creations,
+    metric_destroy_channel_errors,
+    metric_destroy_channel_latency,
+    metric_destroy_errors,
+    metric_destroy_health_checker_errors,
+    metric_destroy_health_checker_latency,
+    metric_destroy_invocation_state_client_errors,
+    metric_destroy_invocation_state_client_latency,
+    metric_destroy_latency,
+    metric_destroy_server_errors,
+    metric_destroy_server_latency,
+    metric_destroys,
+    metric_establish_channel_errors,
+    metric_establish_channel_latency,
+    metric_function_executors_count,
+    metric_initialize_rpc_errors,
+    metric_initialize_rpc_latency,
+)
 from .server.function_executor_server import (
     FUNCTION_EXECUTOR_SERVER_READY_TIMEOUT_SEC,
     FunctionExecutorServer,
@@ -47,6 +74,7 @@ class FunctionExecutor:
         self._invocation_state_client: Optional[InvocationStateClient] = None
         self._health_checker: Optional[HealthChecker] = None
         self._initialized = False
+        metric_function_executors_count.inc()
     async def initialize(
         self,
@@ -60,31 +88,23 @@ class FunctionExecutor:
         Raises CustomerError if the server failed to initialize due to an error in customer owned code or data.
         Raises an Exception if an internal error occured."""
         try:
-            self._server = await self._server_factory.create(
-                config=config, logger=self._logger
-            )
-            self._channel = await self._server.create_channel(self._logger)
-            await _channel_ready(self._channel)
-            stub: FunctionExecutorStub = FunctionExecutorStub(self._channel)
-            await _initialize_server(stub, initialize_request)
-            self._invocation_state_client = InvocationStateClient(
-                stub=stub,
-                base_url=base_url,
-                http_client=get_httpx_client(config_path=config_path, make_async=True),
-                graph=initialize_request.graph_name,
-                namespace=initialize_request.namespace,
-                logger=self._logger,
-            )
-            await self._invocation_state_client.start()
-            self._health_checker = HealthChecker(
-                stub=stub,
-                logger=self._logger,
-            )
-            self._initialized = True
+            with (
+                metric_create_errors.count_exceptions(),
+                metric_create_latency.time(),
+            ):
+                metric_creations.inc()
+                await self._create_server(config)
+                await self._establish_channel()
+                stub: FunctionExecutorStub = FunctionExecutorStub(self._channel)
+                await _initialize_server(stub, initialize_request)
+                await self._create_invocation_state_client(
+                    stub=stub,
+                    base_url=base_url,
+                    config_path=config_path,
+                    initialize_request=initialize_request,
+                )
+                await self._create_health_checker(stub)
+                self._initialized = True
         except Exception:
             await self.destroy()
             raise
@@ -106,56 +126,155 @@ class FunctionExecutor:
         Never raises any exceptions but logs them."""
         try:
-            if self._health_checker is not None:
-                self._health_checker.stop()
-                self._health_checker = None
+            with (
+                metric_destroy_errors.count_exceptions(),
+                metric_destroy_latency.time(),
+            ):
+                metric_function_executors_count.dec()
+                metric_destroys.inc()
+                await self._destroy_health_checker()
+                await self._destroy_invocation_state_client()
+                await self._destroy_channel()
+                await self._destroy_server()
         except Exception as e:
-            self._logger.error("failed to stop HealthChecker", exc_info=e)
+            self._logger.error(
+                "exception from a Function Executor destroy step, some destroy steps are not executed, this is a resource leak",
+                exc_info=e,
+            )
+    def _check_initialized(self) -> None:
+        if not self._initialized:
+            raise RuntimeError("FunctionExecutor is not initialized")
+    async def _create_server(self, config: FunctionExecutorServerConfiguration) -> None:
+        with (
+            metric_create_server_errors.count_exceptions(),
+            metric_create_server_latency.time(),
+        ):
+            self._server = await self._server_factory.create(
+                config=config, logger=self._logger
+            )
+    async def _destroy_server(self) -> None:
+        if self._server is None:
+            return
         try:
-            if self._invocation_state_client is not None:
-                await self._invocation_state_client.destroy()
-                self._invocation_state_client = None
+            with (
+                metric_destroy_server_errors.count_exceptions(),
+                metric_destroy_server_latency.time(),
+            ):
+                await self._server_factory.destroy(self._server, self._logger)
         except Exception as e:
-            self._logger.error(
-                "failed to destroy FunctionExecutor invocation state client", exc_info=e
+            self._logger.error("failed to destroy FunctionExecutorServer", exc_info=e)
+        finally:
+            self._server = None
+    async def _establish_channel(self) -> None:
+        with (
+            metric_establish_channel_errors.count_exceptions(),
+            metric_establish_channel_latency.time(),
+        ):
+            self._channel = await self._server.create_channel(self._logger)
+            await asyncio.wait_for(
+                self._channel.channel_ready(),
+                timeout=FUNCTION_EXECUTOR_SERVER_READY_TIMEOUT_SEC,
             )
+    async def _destroy_channel(self) -> None:
+        if self._channel is None:
+            return
         try:
-            if self._channel is not None:
+            with (
+                metric_destroy_channel_errors.count_exceptions(),
+                metric_destroy_channel_latency.time(),
+            ):
                 await self._channel.close()
-                self._channel = None
         except Exception as e:
             self._logger.error(
                 "failed to close FunctionExecutorServer channel", exc_info=e
             )
+        finally:
+            self._channel = None
+    async def _create_invocation_state_client(
+        self,
+        stub: FunctionExecutorStub,
+        base_url: str,
+        config_path: Optional[str],
+        initialize_request: InitializeRequest,
+    ) -> None:
+        with (
+            metric_create_invocation_state_client_errors.count_exceptions(),
+            metric_create_invocation_state_client_latency.time(),
+        ):
+            self._invocation_state_client = InvocationStateClient(
+                stub=stub,
+                base_url=base_url,
+                http_client=get_httpx_client(config_path=config_path, make_async=True),
+                graph=initialize_request.graph_name,
+                namespace=initialize_request.namespace,
+                logger=self._logger,
+            )
+            await self._invocation_state_client.start()
+    async def _destroy_invocation_state_client(self) -> None:
+        if self._invocation_state_client is None:
+            return
         try:
-            if self._server is not None:
-                await self._server_factory.destroy(self._server, self._logger)
-                self._server = None
+            with (
+                metric_destroy_invocation_state_client_errors.count_exceptions(),
+                metric_destroy_invocation_state_client_latency.time(),
+            ):
+                await self._invocation_state_client.destroy()
         except Exception as e:
-            self._logger.error("failed to destroy FunctionExecutorServer", exc_info=e)
+            self._logger.error(
+                "failed to destroy FunctionExecutor invocation state client", exc_info=e
+            )
+        finally:
+            self._invocation_state_client = None
-    def _check_initialized(self):
-        if not self._initialized:
-            raise RuntimeError("FunctionExecutor is not initialized")
+    async def _create_health_checker(self, stub: FunctionExecutorStub) -> None:
+        with (
+            metric_create_health_checker_errors.count_exceptions(),
+            metric_create_health_checker_latency.time(),
+        ):
+            self._health_checker = HealthChecker(
+                stub=stub,
+                logger=self._logger,
+            )
+    async def _destroy_health_checker(self) -> None:
+        if self._health_checker is None:
+            return
-async def _channel_ready(channel: grpc.aio.Channel):
-    await asyncio.wait_for(
-        channel.channel_ready(),
-        timeout=FUNCTION_EXECUTOR_SERVER_READY_TIMEOUT_SEC,
-    )
+        try:
+            with (
+                metric_destroy_health_checker_errors.count_exceptions(),
+                metric_destroy_health_checker_latency.time(),
+            ):
+                self._health_checker.stop()
+        except Exception as e:
+            self._logger.error("failed to stop HealthChecker", exc_info=e)
+        finally:
+            self._health_checker = None
 async def _initialize_server(
     stub: FunctionExecutorStub, initialize_request: InitializeRequest
 ):
-    initialize_response: InitializeResponse = await stub.initialize(initialize_request)
-    if initialize_response.success:
-        return
-    if initialize_response.HasField("customer_error"):
-        raise CustomerError(initialize_response.customer_error)
-    else:
-        raise Exception("initialize RPC failed at function executor server")
+    with (
+        metric_initialize_rpc_errors.count_exceptions(),
+        metric_initialize_rpc_latency.time(),
+    ):
+        initialize_response: InitializeResponse = await stub.initialize(
+            initialize_request
+        )
+        if initialize_response.success:
+            return
+        if initialize_response.HasField("customer_error"):
+            raise CustomerError(initialize_response.customer_error)
+        else:
+            raise Exception("initialize RPC failed at function executor server")

indexify/executor/function_executor/function_executor_state.py CHANGED Viewed

@@ -2,6 +2,9 @@ import asyncio
 from typing import Optional
 from .function_executor import FunctionExecutor
+from .metrics.function_executor_state import (
+    metric_function_executor_state_not_locked_errors,
+)
 class FunctionExecutorState:
@@ -18,6 +21,8 @@ class FunctionExecutorState:
         # All the fields below are protected by the lock.
         self.lock: asyncio.Lock = asyncio.Lock()
         self.is_shutdown: bool = False
+        # Set to True if a Function Executor health check ever failed.
+        self.health_check_failed: bool = False
         self.function_executor: Optional[FunctionExecutor] = None
         self.running_tasks: int = 0
         self.running_tasks_change_notifier: asyncio.Condition = asyncio.Condition(
@@ -75,4 +80,5 @@ class FunctionExecutorState:
     def check_locked(self) -> None:
         """Raises an exception if the lock is not held."""
         if not self.lock.locked():
+            metric_function_executor_state_not_locked_errors.inc()
             raise RuntimeError("The FunctionExecutorState lock must be held.")

indexify/executor/function_executor/function_executor_states_container.py ADDED Viewed

@@ -0,0 +1,64 @@
+import asyncio
+from typing import AsyncGenerator, Dict
+from ..api_objects import Task
+from .function_executor_state import FunctionExecutorState
+from .metrics.function_executor_state_container import (
+    metric_function_executor_states_count,
+)
+class FunctionExecutorStatesContainer:
+    """An asyncio concurrent container for the function executor states."""
+    def __init__(self):
+        # The fields below are protected by the lock.
+        self._lock: asyncio.Lock = asyncio.Lock()
+        self._states: Dict[str, FunctionExecutorState] = {}
+        self._is_shutdown: bool = False
+    async def get_or_create_state(self, task: Task) -> FunctionExecutorState:
+        """Get or create a function executor state for the given task.
+        Raises Exception if it's not possible to create a new state at this time."""
+        async with self._lock:
+            if self._is_shutdown:
+                raise RuntimeError("Task runner is shutting down.")
+            id = function_id_without_version(task)
+            if id not in self._states:
+                state = FunctionExecutorState(
+                    function_id_with_version=function_id_with_version(task),
+                    function_id_without_version=id,
+                )
+                self._states[id] = state
+                metric_function_executor_states_count.set(len(self._states))
+            return self._states[id]
+    async def __aiter__(self) -> AsyncGenerator[FunctionExecutorState, None]:
+        async with self._lock:
+            for state in self._states.values():
+                yield state
+    async def shutdown(self):
+        # Function Executors are outside the Executor process
+        # so they need to get cleaned up explicitly and reliably.
+        async with self._lock:
+            self._is_shutdown = True  # No new Function Executor States can be created.
+            while self._states:
+                id, state = self._states.popitem()
+                metric_function_executor_states_count.set(len(self._states))
+                # Only ongoing tasks who have a reference to the state already can see it.
+                # The state is unlocked while a task is running inside Function Executor.
+                async with state.lock:
+                    await state.shutdown()
+                    # The task running inside the Function Executor will fail because it's destroyed.
+def function_id_with_version(task: Task) -> str:
+    return f"versioned/{task.namespace}/{task.compute_graph}/{task.graph_version}/{task.compute_fn}"
+def function_id_without_version(task: Task) -> str:
+    return f"not_versioned/{task.namespace}/{task.compute_graph}/{task.compute_fn}"

indexify/executor/function_executor/health_checker.py CHANGED Viewed

@@ -11,6 +11,10 @@ from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
     FunctionExecutorStub,
 )
+from .metrics.health_checker import (
+    metric_failed_health_checks,
+    metric_health_check_latency,
+)
 from .server.client_configuration import HEALTH_CHECK_TIMEOUT_SEC
 HEALTH_CHECK_POLL_PERIOD_SEC = 10
@@ -29,16 +33,22 @@ class HealthChecker:
         """Runs the health check once and returns the result.
         Does not raise any exceptions."""
-        try:
-            response: HealthCheckResponse = await self._stub.check_health(
-                HealthCheckRequest(), timeout=HEALTH_CHECK_TIMEOUT_SEC
-            )
-            return response.healthy
-        except AioRpcError:
-            return False
-        except Exception as e:
-            self._logger.warning("Got unexpected exception, ignoring", exc_info=e)
-            return False
+        with metric_health_check_latency.time():
+            try:
+                response: HealthCheckResponse = await self._stub.check_health(
+                    HealthCheckRequest(), timeout=HEALTH_CHECK_TIMEOUT_SEC
+                )
+                if not response.healthy:
+                    metric_failed_health_checks.inc()
+                return response.healthy
+            except AioRpcError:
+                metric_failed_health_checks.inc()
+                # Expected exception when there are problems with communication because e.g. the server is unhealthy.
+                return False
+            except Exception as e:
+                metric_failed_health_checks.inc()
+                self._logger.warning("Got unexpected exception, ignoring", exc_info=e)
+                return False
     def start(self, callback: Callable[[], Awaitable[None]]) -> None:
         """Starts periodic health checks.

indexify/executor/function_executor/invocation_state_client.py CHANGED Viewed

@@ -16,6 +16,15 @@ from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
 from tensorlake.function_executor.proto.message_validator import MessageValidator
 from ..downloader import serialized_object_from_http_response
+from .metrics.invocation_state_client import (
+    metric_request_read_errors,
+    metric_server_get_state_request_errors,
+    metric_server_get_state_request_latency,
+    metric_server_get_state_requests,
+    metric_server_set_state_request_errors,
+    metric_server_set_state_request_latency,
+    metric_server_set_state_requests,
+)
 class InvocationStateClient:
@@ -92,6 +101,12 @@ class InvocationStateClient:
         except asyncio.CancelledError:
             # This async task was cancelled by destroy(). Normal situation too.
             pass
+        except Exception as e:
+            metric_request_read_errors.inc()
+            self._logger.error(
+                "failed to read request from server, shutting down invocation state client",
+                exc_info=e,
+            )
     async def _process_request_no_raise(self, request: InvocationStateRequest) -> None:
         try:
@@ -122,9 +137,14 @@ class InvocationStateClient:
         # a privelege escalation attempt.
         invocation_id: str = self._task_id_to_invocation_id[request.task_id]
         if request.HasField("get"):
-            value: Optional[SerializedObject] = await self._get_server_state(
-                invocation_id, request.get.key
-            )
+            with (
+                metric_server_get_state_request_errors.count_exceptions(),
+                metric_server_get_state_request_latency.time(),
+            ):
+                metric_server_get_state_requests.inc()
+                value: Optional[SerializedObject] = await self._get_server_state(
+                    invocation_id, request.get.key
+                )
             await self._client_response_queue.put(
                 InvocationStateResponse(
                     request_id=request.request_id,
@@ -136,9 +156,14 @@ class InvocationStateClient:
                 )
             )
         elif request.HasField("set"):
-            await self._set_server_state(
-                invocation_id, request.set.key, request.set.value
-            )
+            with (
+                metric_server_set_state_request_errors.count_exceptions(),
+                metric_server_set_state_request_latency.time(),
+            ):
+                metric_server_set_state_requests.inc()
+                await self._set_server_state(
+                    invocation_id, request.set.key, request.set.value
+                )
             await self._client_response_queue.put(
                 InvocationStateResponse(
                     request_id=request.request_id,

indexify/executor/function_executor/metrics/function_executor.py ADDED Viewed

@@ -0,0 +1,142 @@
+import prometheus_client
+from ...monitoring.metrics import (
+    latency_metric_for_customer_controlled_operation,
+    latency_metric_for_fast_operation,
+    latency_metric_for_slow_operation,
+)
+# This file contains all metrics used by FunctionExecutor.
+metric_function_executors_count = prometheus_client.Gauge(
+    "function_executors_count", "Number of existing Function Executors"
+)
+# Metrics about whole FE creation workflow.
+metric_creations: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_creates",
+    "Number of Function Executor creations",
+)
+metric_create_latency: prometheus_client.Histogram = (
+    latency_metric_for_customer_controlled_operation(
+        "function_executor_create", "Function Executor creation (aka cold start)"
+    )
+)
+metric_create_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_create_errors", "Number of Function Executor creation errors"
+)
+# Metrics about whole FE destroy workflow.
+metric_destroys: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_destroys", "Number of Function Executor destructions"
+)
+metric_destroy_latency: prometheus_client.Histogram = latency_metric_for_slow_operation(
+    "function_executor_destroy", "Function Executor destruction"
+)
+metric_destroy_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_destroy_errors",
+    "Number of Function Executor destruction errors, results in a resource leak",
+)
+# FE server create and destruction metrics.
+metric_create_server_latency: prometheus_client.Histogram = (
+    latency_metric_for_slow_operation(
+        "function_executor_create_server", "Function Executor server creation"
+    )
+)
+metric_create_server_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_create_server_errors",
+    "Number of Function Executor server creation errors",
+)
+metric_destroy_server_latency: prometheus_client.Histogram = (
+    latency_metric_for_slow_operation(
+        "function_executor_destroy_server", "Function Executor server destruction"
+    )
+)
+metric_destroy_server_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_destroy_server_errors",
+    "Number of Function Executor server destruction errors",
+)
+# FE channel creation and destruction metrics.
+metric_establish_channel_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_establish_channel", "Function Executor channel establishment"
+    )
+)
+metric_establish_channel_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_establish_channel_errors",
+    "Number of Function Executor channel establishment errors",
+)
+metric_destroy_channel_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_destroy_channel", "Function Executor channel destruction"
+    )
+)
+metric_destroy_channel_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_destroy_channel_errors",
+    "Number of Function Executor channel destruction errors",
+)
+# FE initialization RPC metrics.
+metric_initialize_rpc_latency: prometheus_client.Histogram = (
+    latency_metric_for_customer_controlled_operation(
+        "function_executor_initialize_rpc", "Function Executor initialize RPC"
+    )
+)
+metric_initialize_rpc_errors: prometheus_client.Counter = prometheus_client.Counter(
+    "function_executor_initialize_rpc_errors",
+    "Number of Function Executor initialize RPC errors",
+)
+# FE invocation state client creation and destruction metrics.
+metric_create_invocation_state_client_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_create_invocation_state_client",
+        "Function Executor invocation state client creation",
+    )
+)
+metric_create_invocation_state_client_errors: prometheus_client.Counter = (
+    prometheus_client.Counter(
+        "function_executor_create_invocation_state_client_errors",
+        "Number of Function Executor invocation state client creation errors",
+    )
+)
+metric_destroy_invocation_state_client_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_destroy_invocation_state_client",
+        "Function Executor invocation state client destruction",
+    )
+)
+metric_destroy_invocation_state_client_errors: prometheus_client.Counter = (
+    prometheus_client.Counter(
+        "function_executor_destroy_invocation_state_client_errors",
+        "Number of Function Executor invocation state client destruction errors",
+    )
+)
+# FE health checker creation and destruction metrics.
+metric_create_health_checker_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_create_health_checker",
+        "Function Executor health checker creation",
+    )
+)
+metric_create_health_checker_errors: prometheus_client.Counter = (
+    prometheus_client.Counter(
+        "function_executor_create_health_checker_errors",
+        "Number of Function Executor health checker creation errors",
+    )
+)
+metric_destroy_health_checker_latency: prometheus_client.Histogram = (
+    latency_metric_for_fast_operation(
+        "function_executor_destroy_health_checker",
+        "Function Executor health checker destruction",
+    )
+)
+metric_destroy_health_checker_errors: prometheus_client.Counter = (
+    prometheus_client.Counter(
+        "function_executor_destroy_health_checker_errors",
+        "Number of Function Executor health checker destruction errors",
+    )
+)

indexify/executor/function_executor/metrics/function_executor_state.py ADDED Viewed

@@ -0,0 +1,10 @@
+import prometheus_client
+# This file contains all metrics used by FunctionExecutorState.
+metric_function_executor_state_not_locked_errors: prometheus_client.Counter = (
+    prometheus_client.Counter(
+        "function_executor_state_not_locked_errors",
+        "Number of times a Function Executor state was used without acquiring its lock",
+    )
+)

indexify/executor/function_executor/metrics/function_executor_state_container.py ADDED Viewed

@@ -0,0 +1,10 @@
+import prometheus_client
+# This file contains all metrics used by FunctionExecutorStatesContainer.
+metric_function_executor_states_count: prometheus_client.Gauge = (
+    prometheus_client.Gauge(
+        "function_executor_states_count",
+        "Number of existing Function Executor states",
+    )
+)

indexify/executor/function_executor/metrics/health_checker.py ADDED Viewed

@@ -0,0 +1,14 @@
+import prometheus_client
+from ...monitoring.metrics import latency_metric_for_fast_operation
+# This file contains all metrics used by HealthChecker.
+metric_failed_health_checks = prometheus_client.Counter(
+    "function_executor_failed_health_checks",
+    "Number of health checks that were not successful",
+)
+metric_health_check_latency = latency_metric_for_fast_operation(
+    "function_executor_health_check",
+    "Function Executor health check",
+)

indexify 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

indexify 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl