indexify 0.4.15__py3-none-any.whl → 0.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/executor/channel_manager.py +36 -58
- indexify/executor/executor.py +1 -0
- indexify/executor/monitoring/health_checker/generic_health_checker.py +20 -4
- indexify/executor/monitoring/health_checker/health_checker.py +7 -3
- {indexify-0.4.15.dist-info → indexify-0.4.16.dist-info}/METADATA +3 -3
- {indexify-0.4.15.dist-info → indexify-0.4.16.dist-info}/RECORD +8 -8
- {indexify-0.4.15.dist-info → indexify-0.4.16.dist-info}/WHEEL +0 -0
- {indexify-0.4.15.dist-info → indexify-0.4.16.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
-
import
|
2
|
+
import time
|
3
3
|
from typing import Any, Dict, Optional
|
4
4
|
|
5
5
|
import grpc.aio
|
@@ -10,16 +10,23 @@ from .metrics.channel_manager import (
|
|
10
10
|
metric_grpc_server_channel_creation_retries,
|
11
11
|
metric_grpc_server_channel_creations,
|
12
12
|
)
|
13
|
+
from .monitoring.health_checker.health_checker import HealthChecker
|
13
14
|
|
14
15
|
_RETRY_INTERVAL_SEC = 5
|
15
16
|
_CONNECT_TIMEOUT_SEC = 5
|
16
17
|
|
17
18
|
|
18
19
|
class ChannelManager:
|
19
|
-
def __init__(
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
server_address: str,
|
23
|
+
config_path: Optional[str],
|
24
|
+
health_checker: HealthChecker,
|
25
|
+
logger: Any,
|
26
|
+
):
|
20
27
|
self._logger: Any = logger.bind(module=__name__, server_address=server_address)
|
21
|
-
self._keep_alive_period_sec: int = _keep_alive_period_sec_from_env(logger)
|
22
28
|
self._server_address: str = server_address
|
29
|
+
self._health_checker: HealthChecker = health_checker
|
23
30
|
self._channel_credentials: Optional[grpc.ChannelCredentials] = None
|
24
31
|
# This lock protects the fields below.
|
25
32
|
self._lock = asyncio.Lock()
|
@@ -86,31 +93,33 @@ class ChannelManager:
|
|
86
93
|
# Use the lock to ensure that we only create one channel without race conditions.
|
87
94
|
async with self._lock:
|
88
95
|
if self._channel is None:
|
96
|
+
# Only called on Executor startup when we establish the channel for the first time.
|
89
97
|
self._channel = await self._create_ready_channel()
|
90
98
|
elif not await self._locked_channel_is_healthy():
|
91
99
|
self._logger.info("grpc channel to server is unhealthy")
|
100
|
+
self._health_checker.server_connection_state_changed(
|
101
|
+
is_healthy=False,
|
102
|
+
status_message="grpc channel to server is unhealthy",
|
103
|
+
)
|
92
104
|
await self._destroy_locked_channel()
|
93
105
|
self._channel = await self._create_ready_channel()
|
106
|
+
self._health_checker.server_connection_state_changed(
|
107
|
+
is_healthy=True, status_message="grpc channel to server is healthy"
|
108
|
+
)
|
94
109
|
|
95
110
|
return self._channel
|
96
111
|
|
97
112
|
def create_channel(self) -> grpc.aio.Channel:
|
98
113
|
"""Creates a new channel to the gRPC server.
|
99
114
|
|
100
|
-
The channel is not
|
115
|
+
The channel is not ready to use. Raises an exception on failure.
|
101
116
|
"""
|
102
|
-
channel_options: list[tuple[str, int]] = _channel_options(
|
103
|
-
self._keep_alive_period_sec
|
104
|
-
)
|
105
117
|
if self._channel_credentials is None:
|
106
|
-
return grpc.aio.insecure_channel(
|
107
|
-
target=self._server_address, options=channel_options
|
108
|
-
)
|
118
|
+
return grpc.aio.insecure_channel(target=self._server_address)
|
109
119
|
else:
|
110
120
|
return grpc.aio.secure_channel(
|
111
121
|
target=self._server_address,
|
112
122
|
credentials=self._channel_credentials,
|
113
|
-
options=channel_options,
|
114
123
|
)
|
115
124
|
|
116
125
|
async def _create_ready_channel(self) -> grpc.aio.Channel:
|
@@ -119,25 +128,36 @@ class ChannelManager:
|
|
119
128
|
Returns a ready to use channel. Blocks until the channel
|
120
129
|
is ready, never raises any exceptions.
|
121
130
|
"""
|
122
|
-
self._logger.info("creating new grpc server channel")
|
123
|
-
|
124
131
|
with metric_grpc_server_channel_creation_latency.time():
|
125
132
|
metric_grpc_server_channel_creations.inc()
|
126
133
|
while True:
|
127
134
|
try:
|
128
|
-
|
135
|
+
self._logger.info("creating new grpc server channel")
|
136
|
+
create_channel_start = time.monotonic()
|
137
|
+
channel: grpc.Channel = self.create_channel()
|
138
|
+
self._logger.info(
|
139
|
+
"grpc server channel created",
|
140
|
+
duration_sec=time.monotonic() - create_channel_start,
|
141
|
+
)
|
142
|
+
|
143
|
+
channel_ready_start = time.monotonic()
|
129
144
|
await asyncio.wait_for(
|
130
145
|
channel.channel_ready(),
|
131
146
|
timeout=_CONNECT_TIMEOUT_SEC,
|
132
147
|
)
|
148
|
+
self._logger.info(
|
149
|
+
"grpc server channel is established (ready)",
|
150
|
+
duration_sec=time.monotonic() - channel_ready_start,
|
151
|
+
)
|
152
|
+
|
133
153
|
return channel
|
134
|
-
except
|
154
|
+
except BaseException:
|
135
155
|
self._logger.error(
|
136
156
|
f"failed establishing grpc server channel in {_CONNECT_TIMEOUT_SEC} sec, retrying in {_RETRY_INTERVAL_SEC} sec"
|
137
157
|
)
|
138
158
|
try:
|
139
159
|
await channel.close()
|
140
|
-
except
|
160
|
+
except BaseException as e:
|
141
161
|
self._logger.error(
|
142
162
|
"failed closing not established channel", exc_info=e
|
143
163
|
)
|
@@ -173,45 +193,3 @@ class ChannelManager:
|
|
173
193
|
except Exception as e:
|
174
194
|
self._logger.error("failed closing channel", exc_info=e)
|
175
195
|
self._channel = None
|
176
|
-
|
177
|
-
|
178
|
-
def _channel_options(keep_alive_period_sec: int) -> list[tuple[str, int]]:
|
179
|
-
"""Returns the gRPC channel options."""
|
180
|
-
# See https://grpc.io/docs/guides/keepalive/.
|
181
|
-
#
|
182
|
-
# NB: Rust Tonic framework that we're using in Server is not using gRPC core and doesn't support
|
183
|
-
# these options. From https://github.com/hyperium/tonic/issues/258 it supports gRPC PINGs when
|
184
|
-
# there are in-flight RPCs (and streams) without any extra configuration.
|
185
|
-
return [
|
186
|
-
("grpc.keepalive_time_ms", keep_alive_period_sec * 1000),
|
187
|
-
(
|
188
|
-
"grpc.http2.max_pings_without_data",
|
189
|
-
-1,
|
190
|
-
), # Allow any number of empty PING messages
|
191
|
-
(
|
192
|
-
"grpc.keepalive_permit_without_calls",
|
193
|
-
0,
|
194
|
-
), # Don't send PINGs when there are no in-flight RPCs (and streams)
|
195
|
-
]
|
196
|
-
|
197
|
-
|
198
|
-
def _keep_alive_period_sec_from_env(logger: Any) -> int:
|
199
|
-
"""Returns the keep alive period in seconds."""
|
200
|
-
# We have to use gRPC keep alive (PING) to prevent proxies/load-balancers from closing underlying HTTP/2
|
201
|
-
# (TCP) connections due to periods of idleness in gRPC streams that we use between Executor and Server.
|
202
|
-
# If a proxy/load-balancer closes the connection, then we see it as gRPC stream errors which results in
|
203
|
-
# a lot of error logs noise.
|
204
|
-
#
|
205
|
-
# The default period of 50 sec is used for one of the standard proxy/load-balancer timeouts of 1 minute.
|
206
|
-
DEFAULT_KEEP_ALIVE_PERIOD_SEC = "50"
|
207
|
-
keep_alive_period_sec = int(
|
208
|
-
os.getenv(
|
209
|
-
"INDEXIFY_EXECUTOR_GRPC_KEEP_ALIVE_PERIOD_SEC",
|
210
|
-
DEFAULT_KEEP_ALIVE_PERIOD_SEC,
|
211
|
-
)
|
212
|
-
)
|
213
|
-
if keep_alive_period_sec != int(DEFAULT_KEEP_ALIVE_PERIOD_SEC):
|
214
|
-
logger.info(
|
215
|
-
f"gRPC keep alive (PING) period is set to {keep_alive_period_sec} sec"
|
216
|
-
)
|
217
|
-
return keep_alive_period_sec
|
indexify/executor/executor.py
CHANGED
@@ -69,6 +69,7 @@ class Executor:
|
|
69
69
|
self._channel_manager = ChannelManager(
|
70
70
|
server_address=grpc_server_addr,
|
71
71
|
config_path=config_path,
|
72
|
+
health_checker=health_checker,
|
72
73
|
logger=self._logger,
|
73
74
|
)
|
74
75
|
function_allowlist: List[FunctionURI] = parse_function_uris(function_uris)
|
@@ -1,6 +1,8 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
from .health_checker import HealthChecker, HealthCheckResult
|
2
4
|
|
3
|
-
|
5
|
+
_HEALTH_CHECKER_NAME = "GenericHealthChecker"
|
4
6
|
|
5
7
|
|
6
8
|
class GenericHealthChecker(HealthChecker):
|
@@ -10,11 +12,25 @@ class GenericHealthChecker(HealthChecker):
|
|
10
12
|
"""
|
11
13
|
|
12
14
|
def __init__(self):
|
13
|
-
|
15
|
+
self._server_connection_unhealthy_status_message: Optional[str] = None
|
16
|
+
|
17
|
+
def server_connection_state_changed(self, is_healthy: bool, status_message: str):
|
18
|
+
"""Handle changes in server connection state."""
|
19
|
+
if is_healthy:
|
20
|
+
self._server_connection_unhealthy_status_message = None
|
21
|
+
else:
|
22
|
+
self._server_connection_unhealthy_status_message = status_message
|
14
23
|
|
15
24
|
async def check(self) -> HealthCheckResult:
|
25
|
+
if self._server_connection_unhealthy_status_message is not None:
|
26
|
+
return HealthCheckResult(
|
27
|
+
is_success=False,
|
28
|
+
status_message=self._server_connection_unhealthy_status_message,
|
29
|
+
checker_name=_HEALTH_CHECKER_NAME,
|
30
|
+
)
|
31
|
+
|
16
32
|
return HealthCheckResult(
|
17
33
|
is_success=True,
|
18
|
-
status_message="
|
19
|
-
checker_name=
|
34
|
+
status_message="Successful",
|
35
|
+
checker_name=_HEALTH_CHECKER_NAME,
|
20
36
|
)
|
@@ -1,12 +1,16 @@
|
|
1
1
|
class HealthCheckResult:
|
2
2
|
def __init__(self, checker_name: str, is_success: bool, status_message: str):
|
3
|
-
self.checker_name = checker_name
|
4
|
-
self.is_success = is_success
|
5
|
-
self.status_message = status_message
|
3
|
+
self.checker_name: str = checker_name
|
4
|
+
self.is_success: bool = is_success
|
5
|
+
self.status_message: str = status_message
|
6
6
|
|
7
7
|
|
8
8
|
class HealthChecker:
|
9
9
|
"""Abstract base class for health checkers."""
|
10
10
|
|
11
|
+
def server_connection_state_changed(self, is_healthy: bool, status_message: str):
|
12
|
+
"""Handle changes in server connection state."""
|
13
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
14
|
+
|
11
15
|
async def check(self) -> HealthCheckResult:
|
12
16
|
raise NotImplementedError("Subclasses must implement this method.")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.16
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -14,10 +14,10 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
|
17
|
-
Requires-Dist: boto3 (>=1.39.
|
17
|
+
Requires-Dist: boto3 (>=1.39.6,<2.0.0)
|
18
18
|
Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
|
19
19
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
20
|
-
Requires-Dist: tensorlake (==0.2.
|
20
|
+
Requires-Dist: tensorlake (==0.2.25)
|
21
21
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
@@ -7,8 +7,8 @@ indexify/executor/blob_store/blob_store.py,sha256=XViw_KRfFSNqwcFYwMZixZF-EYCjXK
|
|
7
7
|
indexify/executor/blob_store/local_fs_blob_store.py,sha256=6LexqMBGXp8f6Ka95R6xMIUyDutrZJABOMNcp-ssa98,1809
|
8
8
|
indexify/executor/blob_store/metrics/blob_store.py,sha256=5_xiPREeHWFtxFh1NupDsF8zP4pmUPgLNNn-UE9Uzvc,1008
|
9
9
|
indexify/executor/blob_store/s3_blob_store.py,sha256=G3B_V3gUE7XbUY42lDtBczUKuA7q8S7MD43tx1aHrJo,3445
|
10
|
-
indexify/executor/channel_manager.py,sha256=
|
11
|
-
indexify/executor/executor.py,sha256=
|
10
|
+
indexify/executor/channel_manager.py,sha256=rulAmCvgEupKZ0esuO4COfosuhm1iVZ27IIqeqFu0Vg,7750
|
11
|
+
indexify/executor/executor.py,sha256=iMmysK_6jZkczcVqjzbQdXzrALEig_Qtb51MhQUfRqc,6353
|
12
12
|
indexify/executor/function_allowlist.py,sha256=PCelCW6qIe_2sH11BCKr7LDqarRV5kwNsrfB2EV7Zwo,1772
|
13
13
|
indexify/executor/function_executor/function_executor.py,sha256=Hz_dT_2i1m9akUGfULWQpDlMsn0CI1AX4Mdt7-oOknI,13598
|
14
14
|
indexify/executor/function_executor/health_checker.py,sha256=IxE0jnC99K_lvnizFLjXqS1942H8-FNAN4AlhLIjg2Y,6373
|
@@ -51,8 +51,8 @@ indexify/executor/metrics/state_reconciler.py,sha256=BSlRgvgtwih6QcYrsFU5P2ylaXA
|
|
51
51
|
indexify/executor/metrics/state_reporter.py,sha256=_dssgz335UyZ67OoKNyI50gn4kzPZyFN4VpFGMNw4qE,542
|
52
52
|
indexify/executor/monitoring/handler.py,sha256=Cj1cu_LcsAP0tdviqNhoEtGm4h0OJAxxzW9C2YdNXYU,240
|
53
53
|
indexify/executor/monitoring/health_check_handler.py,sha256=e1pEtWFKaVs6H57Z4YLejNECrJtC38PweZc7xTJeqVw,695
|
54
|
-
indexify/executor/monitoring/health_checker/generic_health_checker.py,sha256=
|
55
|
-
indexify/executor/monitoring/health_checker/health_checker.py,sha256=
|
54
|
+
indexify/executor/monitoring/health_checker/generic_health_checker.py,sha256=8vvny1DOOaZMxPwcpeTuuVh7l42YCaOd6IkIxzNUURg,1344
|
55
|
+
indexify/executor/monitoring/health_checker/health_checker.py,sha256=B-Q4KM1iEUSMA2fr9PBhBLdA7sYII_NuTRmPuRILGSo,665
|
56
56
|
indexify/executor/monitoring/metrics.py,sha256=Dx2wPcTKvbd5Y5rGOfeyscFtAQ2DZ16_s5BX6d4nhI8,6660
|
57
57
|
indexify/executor/monitoring/prometheus_metrics_handler.py,sha256=KiGqSf7rkXTfbDwThyXFpFe2jnuZD5q-5SBP_0GDo8Y,591
|
58
58
|
indexify/executor/monitoring/server.py,sha256=yzdYhcxnmY6uTQUMt3vatF5jilN52ZtfFseOmHyQpTo,1254
|
@@ -63,7 +63,7 @@ indexify/proto/executor_api.proto,sha256=vP14TJCGO2BSwZ6piqaltiLp5YNPT5jCH2yaehy
|
|
63
63
|
indexify/proto/executor_api_pb2.py,sha256=zdl00UOqgOB1KeRIAceh_43RpAOVLEs9RSbzxQ0hmKY,16163
|
64
64
|
indexify/proto/executor_api_pb2.pyi,sha256=adD5mqqJhmTgRCa_4v1cR6GcOY-VOLOBV9k8T5iaqPc,22647
|
65
65
|
indexify/proto/executor_api_pb2_grpc.py,sha256=gPtP9GscW2D9yUMXl4uEIO3cwe1B3bKprJjFfNrWou4,7607
|
66
|
-
indexify-0.4.
|
67
|
-
indexify-0.4.
|
68
|
-
indexify-0.4.
|
69
|
-
indexify-0.4.
|
66
|
+
indexify-0.4.16.dist-info/METADATA,sha256=k9tCSJJYV9imbC2kJKhKYQz9_preBM7vd2gipq4Fhlw,1116
|
67
|
+
indexify-0.4.16.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
68
|
+
indexify-0.4.16.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
|
69
|
+
indexify-0.4.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|