indexify 0.4.15__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- import os
2
+ import time
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  import grpc.aio
@@ -10,16 +10,23 @@ from .metrics.channel_manager import (
10
10
  metric_grpc_server_channel_creation_retries,
11
11
  metric_grpc_server_channel_creations,
12
12
  )
13
+ from .monitoring.health_checker.health_checker import HealthChecker
13
14
 
14
15
  _RETRY_INTERVAL_SEC = 5
15
16
  _CONNECT_TIMEOUT_SEC = 5
16
17
 
17
18
 
18
19
  class ChannelManager:
19
- def __init__(self, server_address: str, config_path: Optional[str], logger: Any):
20
+ def __init__(
21
+ self,
22
+ server_address: str,
23
+ config_path: Optional[str],
24
+ health_checker: HealthChecker,
25
+ logger: Any,
26
+ ):
20
27
  self._logger: Any = logger.bind(module=__name__, server_address=server_address)
21
- self._keep_alive_period_sec: int = _keep_alive_period_sec_from_env(logger)
22
28
  self._server_address: str = server_address
29
+ self._health_checker: HealthChecker = health_checker
23
30
  self._channel_credentials: Optional[grpc.ChannelCredentials] = None
24
31
  # This lock protects the fields below.
25
32
  self._lock = asyncio.Lock()
@@ -86,31 +93,33 @@ class ChannelManager:
86
93
  # Use the lock to ensure that we only create one channel without race conditions.
87
94
  async with self._lock:
88
95
  if self._channel is None:
96
+ # Only called on Executor startup when we establish the channel for the first time.
89
97
  self._channel = await self._create_ready_channel()
90
98
  elif not await self._locked_channel_is_healthy():
91
99
  self._logger.info("grpc channel to server is unhealthy")
100
+ self._health_checker.server_connection_state_changed(
101
+ is_healthy=False,
102
+ status_message="grpc channel to server is unhealthy",
103
+ )
92
104
  await self._destroy_locked_channel()
93
105
  self._channel = await self._create_ready_channel()
106
+ self._health_checker.server_connection_state_changed(
107
+ is_healthy=True, status_message="grpc channel to server is healthy"
108
+ )
94
109
 
95
110
  return self._channel
96
111
 
97
112
  def create_channel(self) -> grpc.aio.Channel:
98
113
  """Creates a new channel to the gRPC server.
99
114
 
100
- The channel is not be ready to use. Raises an exception on failure.
115
+ The channel is not ready to use. Raises an exception on failure.
101
116
  """
102
- channel_options: list[tuple[str, int]] = _channel_options(
103
- self._keep_alive_period_sec
104
- )
105
117
  if self._channel_credentials is None:
106
- return grpc.aio.insecure_channel(
107
- target=self._server_address, options=channel_options
108
- )
118
+ return grpc.aio.insecure_channel(target=self._server_address)
109
119
  else:
110
120
  return grpc.aio.secure_channel(
111
121
  target=self._server_address,
112
122
  credentials=self._channel_credentials,
113
- options=channel_options,
114
123
  )
115
124
 
116
125
  async def _create_ready_channel(self) -> grpc.aio.Channel:
@@ -119,25 +128,36 @@ class ChannelManager:
119
128
  Returns a ready to use channel. Blocks until the channel
120
129
  is ready, never raises any exceptions.
121
130
  """
122
- self._logger.info("creating new grpc server channel")
123
-
124
131
  with metric_grpc_server_channel_creation_latency.time():
125
132
  metric_grpc_server_channel_creations.inc()
126
133
  while True:
127
134
  try:
128
- channel = self.create_channel()
135
+ self._logger.info("creating new grpc server channel")
136
+ create_channel_start = time.monotonic()
137
+ channel: grpc.Channel = self.create_channel()
138
+ self._logger.info(
139
+ "grpc server channel created",
140
+ duration_sec=time.monotonic() - create_channel_start,
141
+ )
142
+
143
+ channel_ready_start = time.monotonic()
129
144
  await asyncio.wait_for(
130
145
  channel.channel_ready(),
131
146
  timeout=_CONNECT_TIMEOUT_SEC,
132
147
  )
148
+ self._logger.info(
149
+ "grpc server channel is established (ready)",
150
+ duration_sec=time.monotonic() - channel_ready_start,
151
+ )
152
+
133
153
  return channel
134
- except Exception:
154
+ except BaseException:
135
155
  self._logger.error(
136
156
  f"failed establishing grpc server channel in {_CONNECT_TIMEOUT_SEC} sec, retrying in {_RETRY_INTERVAL_SEC} sec"
137
157
  )
138
158
  try:
139
159
  await channel.close()
140
- except Exception as e:
160
+ except BaseException as e:
141
161
  self._logger.error(
142
162
  "failed closing not established channel", exc_info=e
143
163
  )
@@ -173,45 +193,3 @@ class ChannelManager:
173
193
  except Exception as e:
174
194
  self._logger.error("failed closing channel", exc_info=e)
175
195
  self._channel = None
176
-
177
-
178
- def _channel_options(keep_alive_period_sec: int) -> list[tuple[str, int]]:
179
- """Returns the gRPC channel options."""
180
- # See https://grpc.io/docs/guides/keepalive/.
181
- #
182
- # NB: Rust Tonic framework that we're using in Server is not using gRPC core and doesn't support
183
- # these options. From https://github.com/hyperium/tonic/issues/258 it supports gRPC PINGs when
184
- # there are in-flight RPCs (and streams) without any extra configuration.
185
- return [
186
- ("grpc.keepalive_time_ms", keep_alive_period_sec * 1000),
187
- (
188
- "grpc.http2.max_pings_without_data",
189
- -1,
190
- ), # Allow any number of empty PING messages
191
- (
192
- "grpc.keepalive_permit_without_calls",
193
- 0,
194
- ), # Don't send PINGs when there are no in-flight RPCs (and streams)
195
- ]
196
-
197
-
198
- def _keep_alive_period_sec_from_env(logger: Any) -> int:
199
- """Returns the keep alive period in seconds."""
200
- # We have to use gRPC keep alive (PING) to prevent proxies/load-balancers from closing underlying HTTP/2
201
- # (TCP) connections due to periods of idleness in gRPC streams that we use between Executor and Server.
202
- # If a proxy/load-balancer closes the connection, then we see it as gRPC stream errors which results in
203
- # a lot of error logs noise.
204
- #
205
- # The default period of 50 sec is used for one of the standard proxy/load-balancer timeouts of 1 minute.
206
- DEFAULT_KEEP_ALIVE_PERIOD_SEC = "50"
207
- keep_alive_period_sec = int(
208
- os.getenv(
209
- "INDEXIFY_EXECUTOR_GRPC_KEEP_ALIVE_PERIOD_SEC",
210
- DEFAULT_KEEP_ALIVE_PERIOD_SEC,
211
- )
212
- )
213
- if keep_alive_period_sec != int(DEFAULT_KEEP_ALIVE_PERIOD_SEC):
214
- logger.info(
215
- f"gRPC keep alive (PING) period is set to {keep_alive_period_sec} sec"
216
- )
217
- return keep_alive_period_sec
@@ -69,6 +69,7 @@ class Executor:
69
69
  self._channel_manager = ChannelManager(
70
70
  server_address=grpc_server_addr,
71
71
  config_path=config_path,
72
+ health_checker=health_checker,
72
73
  logger=self._logger,
73
74
  )
74
75
  function_allowlist: List[FunctionURI] = parse_function_uris(function_uris)
@@ -1,6 +1,8 @@
1
+ from typing import Optional
2
+
1
3
  from .health_checker import HealthChecker, HealthCheckResult
2
4
 
3
- HEALTH_CHECKER_NAME = "GenericHealthChecker"
5
+ _HEALTH_CHECKER_NAME = "GenericHealthChecker"
4
6
 
5
7
 
6
8
  class GenericHealthChecker(HealthChecker):
@@ -10,11 +12,25 @@ class GenericHealthChecker(HealthChecker):
10
12
  """
11
13
 
12
14
  def __init__(self):
13
- pass
15
+ self._server_connection_unhealthy_status_message: Optional[str] = None
16
+
17
+ def server_connection_state_changed(self, is_healthy: bool, status_message: str):
18
+ """Handle changes in server connection state."""
19
+ if is_healthy:
20
+ self._server_connection_unhealthy_status_message = None
21
+ else:
22
+ self._server_connection_unhealthy_status_message = status_message
14
23
 
15
24
  async def check(self) -> HealthCheckResult:
25
+ if self._server_connection_unhealthy_status_message is not None:
26
+ return HealthCheckResult(
27
+ is_success=False,
28
+ status_message=self._server_connection_unhealthy_status_message,
29
+ checker_name=_HEALTH_CHECKER_NAME,
30
+ )
31
+
16
32
  return HealthCheckResult(
17
33
  is_success=True,
18
- status_message="The health check is always successful",
19
- checker_name=HEALTH_CHECKER_NAME,
34
+ status_message="Successful",
35
+ checker_name=_HEALTH_CHECKER_NAME,
20
36
  )
@@ -1,12 +1,16 @@
1
1
  class HealthCheckResult:
2
2
  def __init__(self, checker_name: str, is_success: bool, status_message: str):
3
- self.checker_name = checker_name
4
- self.is_success = is_success
5
- self.status_message = status_message
3
+ self.checker_name: str = checker_name
4
+ self.is_success: bool = is_success
5
+ self.status_message: str = status_message
6
6
 
7
7
 
8
8
  class HealthChecker:
9
9
  """Abstract base class for health checkers."""
10
10
 
11
+ def server_connection_state_changed(self, is_healthy: bool, status_message: str):
12
+ """Handle changes in server connection state."""
13
+ raise NotImplementedError("Subclasses must implement this method.")
14
+
11
15
  async def check(self) -> HealthCheckResult:
12
16
  raise NotImplementedError("Subclasses must implement this method.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.15
3
+ Version: 0.4.16
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -14,10 +14,10 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
17
- Requires-Dist: boto3 (>=1.39.4,<2.0.0)
17
+ Requires-Dist: boto3 (>=1.39.6,<2.0.0)
18
18
  Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
19
19
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
20
- Requires-Dist: tensorlake (==0.2.24)
20
+ Requires-Dist: tensorlake (==0.2.25)
21
21
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -7,8 +7,8 @@ indexify/executor/blob_store/blob_store.py,sha256=XViw_KRfFSNqwcFYwMZixZF-EYCjXK
7
7
  indexify/executor/blob_store/local_fs_blob_store.py,sha256=6LexqMBGXp8f6Ka95R6xMIUyDutrZJABOMNcp-ssa98,1809
8
8
  indexify/executor/blob_store/metrics/blob_store.py,sha256=5_xiPREeHWFtxFh1NupDsF8zP4pmUPgLNNn-UE9Uzvc,1008
9
9
  indexify/executor/blob_store/s3_blob_store.py,sha256=G3B_V3gUE7XbUY42lDtBczUKuA7q8S7MD43tx1aHrJo,3445
10
- indexify/executor/channel_manager.py,sha256=SeBaOYJXJ6KNw59c4C4n54qhcCusNda2756Rbu8pFFM,8642
11
- indexify/executor/executor.py,sha256=hR49lBFGaqSRj8RtLlzd4cd8hRUwhMrwUHwPx53EHDU,6310
10
+ indexify/executor/channel_manager.py,sha256=rulAmCvgEupKZ0esuO4COfosuhm1iVZ27IIqeqFu0Vg,7750
11
+ indexify/executor/executor.py,sha256=iMmysK_6jZkczcVqjzbQdXzrALEig_Qtb51MhQUfRqc,6353
12
12
  indexify/executor/function_allowlist.py,sha256=PCelCW6qIe_2sH11BCKr7LDqarRV5kwNsrfB2EV7Zwo,1772
13
13
  indexify/executor/function_executor/function_executor.py,sha256=Hz_dT_2i1m9akUGfULWQpDlMsn0CI1AX4Mdt7-oOknI,13598
14
14
  indexify/executor/function_executor/health_checker.py,sha256=IxE0jnC99K_lvnizFLjXqS1942H8-FNAN4AlhLIjg2Y,6373
@@ -51,8 +51,8 @@ indexify/executor/metrics/state_reconciler.py,sha256=BSlRgvgtwih6QcYrsFU5P2ylaXA
51
51
  indexify/executor/metrics/state_reporter.py,sha256=_dssgz335UyZ67OoKNyI50gn4kzPZyFN4VpFGMNw4qE,542
52
52
  indexify/executor/monitoring/handler.py,sha256=Cj1cu_LcsAP0tdviqNhoEtGm4h0OJAxxzW9C2YdNXYU,240
53
53
  indexify/executor/monitoring/health_check_handler.py,sha256=e1pEtWFKaVs6H57Z4YLejNECrJtC38PweZc7xTJeqVw,695
54
- indexify/executor/monitoring/health_checker/generic_health_checker.py,sha256=NQ1NTcaOJKd7xL9IaAtA8-VW4PZWIpW6N97MtTxmWVc,655
55
- indexify/executor/monitoring/health_checker/health_checker.py,sha256=SiWJ_bKNLsxHCzwBMjS-DnFmXmN8CuPA61aTvv_iyTI,429
54
+ indexify/executor/monitoring/health_checker/generic_health_checker.py,sha256=8vvny1DOOaZMxPwcpeTuuVh7l42YCaOd6IkIxzNUURg,1344
55
+ indexify/executor/monitoring/health_checker/health_checker.py,sha256=B-Q4KM1iEUSMA2fr9PBhBLdA7sYII_NuTRmPuRILGSo,665
56
56
  indexify/executor/monitoring/metrics.py,sha256=Dx2wPcTKvbd5Y5rGOfeyscFtAQ2DZ16_s5BX6d4nhI8,6660
57
57
  indexify/executor/monitoring/prometheus_metrics_handler.py,sha256=KiGqSf7rkXTfbDwThyXFpFe2jnuZD5q-5SBP_0GDo8Y,591
58
58
  indexify/executor/monitoring/server.py,sha256=yzdYhcxnmY6uTQUMt3vatF5jilN52ZtfFseOmHyQpTo,1254
@@ -63,7 +63,7 @@ indexify/proto/executor_api.proto,sha256=vP14TJCGO2BSwZ6piqaltiLp5YNPT5jCH2yaehy
63
63
  indexify/proto/executor_api_pb2.py,sha256=zdl00UOqgOB1KeRIAceh_43RpAOVLEs9RSbzxQ0hmKY,16163
64
64
  indexify/proto/executor_api_pb2.pyi,sha256=adD5mqqJhmTgRCa_4v1cR6GcOY-VOLOBV9k8T5iaqPc,22647
65
65
  indexify/proto/executor_api_pb2_grpc.py,sha256=gPtP9GscW2D9yUMXl4uEIO3cwe1B3bKprJjFfNrWou4,7607
66
- indexify-0.4.15.dist-info/METADATA,sha256=IgeB_Sdi6bQk8Va3dMbkblVkewzV_FSH49F6bLS0MgI,1116
67
- indexify-0.4.15.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
68
- indexify-0.4.15.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
69
- indexify-0.4.15.dist-info/RECORD,,
66
+ indexify-0.4.16.dist-info/METADATA,sha256=k9tCSJJYV9imbC2kJKhKYQz9_preBM7vd2gipq4Fhlw,1116
67
+ indexify-0.4.16.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
68
+ indexify-0.4.16.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
69
+ indexify-0.4.16.dist-info/RECORD,,