indexify 0.4.14__tar.gz → 0.4.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {indexify-0.4.14 → indexify-0.4.16}/PKG-INFO +5 -5
  2. {indexify-0.4.14 → indexify-0.4.16}/pyproject.toml +7 -7
  3. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/channel_manager.py +36 -58
  4. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/executor.py +1 -0
  5. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/function_executor_controller.py +0 -1
  6. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/run_task.py +0 -2
  7. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/task_output.py +0 -2
  8. indexify-0.4.16/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +36 -0
  9. indexify-0.4.16/src/indexify/executor/monitoring/health_checker/health_checker.py +16 -0
  10. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/state_reporter.py +4 -4
  11. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/proto/executor_api.proto +0 -1
  12. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/proto/executor_api_pb2.py +18 -18
  13. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/proto/executor_api_pb2.pyi +0 -4
  14. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/proto/executor_api_pb2_grpc.py +1 -1
  15. indexify-0.4.14/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -20
  16. indexify-0.4.14/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -12
  17. {indexify-0.4.14 → indexify-0.4.16}/README.md +0 -0
  18. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/cli/__init__.py +0 -0
  19. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/cli/build_image.py +0 -0
  20. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/cli/deploy.py +0 -0
  21. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/cli/executor.py +0 -0
  22. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/README.md +0 -0
  23. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/blob_store/blob_store.py +0 -0
  24. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
  25. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  26. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
  27. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_allowlist.py +0 -0
  28. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/function_executor.py +0 -0
  29. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/health_checker.py +0 -0
  30. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  31. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  32. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  33. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  34. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  35. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  36. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  37. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  38. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  39. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
  40. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
  41. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -0
  42. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
  43. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
  44. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/events.py +0 -0
  45. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
  46. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
  47. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
  48. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
  49. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
  50. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
  51. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
  52. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
  53. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
  54. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
  55. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +0 -0
  56. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
  57. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/host_resources/host_resources.py +0 -0
  58. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  59. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  60. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/metrics/channel_manager.py +0 -0
  61. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/metrics/executor.py +0 -0
  62. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/metrics/state_reconciler.py +0 -0
  63. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/metrics/state_reporter.py +0 -0
  64. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/handler.py +0 -0
  65. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  66. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/metrics.py +0 -0
  67. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  68. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/server.py +0 -0
  69. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  70. {indexify-0.4.14 → indexify-0.4.16}/src/indexify/executor/state_reconciler.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.14
3
+ Version: 0.4.16
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -13,11 +13,11 @@ Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
17
- Requires-Dist: boto3 (>=1.37.30,<2.0.0)
18
- Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
16
+ Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
17
+ Requires-Dist: boto3 (>=1.39.6,<2.0.0)
18
+ Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
19
19
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
20
- Requires-Dist: tensorlake (==0.2.15)
20
+ Requires-Dist: tensorlake (==0.2.25)
21
21
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.4.14"
4
+ version = "0.4.16"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -20,12 +20,13 @@ python = "^3.10"
20
20
  # httpx is provided by tensorlake
21
21
 
22
22
  # Executor only
23
- aiohttp = "^3.11.0"
24
- prometheus-client = "^0.21.1"
23
+ aiohttp = "^3.12.14"
24
+ prometheus-client = "^0.22.1"
25
25
  psutil = "^7.0.0"
26
+ boto3 = "^1.39.6"
26
27
  # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
27
28
  # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
28
- tensorlake = "0.2.15"
29
+ tensorlake = "0.2.25"
29
30
  # Uncomment the next line to use local tensorlake package (only for development!)
30
31
  # tensorlake = { path = "../tensorlake", develop = true }
31
32
  # pydantic is provided by tensorlake
@@ -35,11 +36,10 @@ tensorlake = "0.2.15"
35
36
  # CLI only
36
37
  # nanoid is provided by tensorlake
37
38
  # click is provided by tensorlake
38
- boto3 = "^1.37.30"
39
39
 
40
40
  [tool.poetry.group.dev.dependencies]
41
- black = "^24.10.0"
42
- pylint = "^3.3.0"
41
+ black = "25.1.0"
42
+ isort = "^6.0.1"
43
43
  parameterized = "^0.9.0"
44
44
 
45
45
  [build-system]
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- import os
2
+ import time
3
3
  from typing import Any, Dict, Optional
4
4
 
5
5
  import grpc.aio
@@ -10,16 +10,23 @@ from .metrics.channel_manager import (
10
10
  metric_grpc_server_channel_creation_retries,
11
11
  metric_grpc_server_channel_creations,
12
12
  )
13
+ from .monitoring.health_checker.health_checker import HealthChecker
13
14
 
14
15
  _RETRY_INTERVAL_SEC = 5
15
16
  _CONNECT_TIMEOUT_SEC = 5
16
17
 
17
18
 
18
19
  class ChannelManager:
19
- def __init__(self, server_address: str, config_path: Optional[str], logger: Any):
20
+ def __init__(
21
+ self,
22
+ server_address: str,
23
+ config_path: Optional[str],
24
+ health_checker: HealthChecker,
25
+ logger: Any,
26
+ ):
20
27
  self._logger: Any = logger.bind(module=__name__, server_address=server_address)
21
- self._keep_alive_period_sec: int = _keep_alive_period_sec_from_env(logger)
22
28
  self._server_address: str = server_address
29
+ self._health_checker: HealthChecker = health_checker
23
30
  self._channel_credentials: Optional[grpc.ChannelCredentials] = None
24
31
  # This lock protects the fields below.
25
32
  self._lock = asyncio.Lock()
@@ -86,31 +93,33 @@ class ChannelManager:
86
93
  # Use the lock to ensure that we only create one channel without race conditions.
87
94
  async with self._lock:
88
95
  if self._channel is None:
96
+ # Only called on Executor startup when we establish the channel for the first time.
89
97
  self._channel = await self._create_ready_channel()
90
98
  elif not await self._locked_channel_is_healthy():
91
99
  self._logger.info("grpc channel to server is unhealthy")
100
+ self._health_checker.server_connection_state_changed(
101
+ is_healthy=False,
102
+ status_message="grpc channel to server is unhealthy",
103
+ )
92
104
  await self._destroy_locked_channel()
93
105
  self._channel = await self._create_ready_channel()
106
+ self._health_checker.server_connection_state_changed(
107
+ is_healthy=True, status_message="grpc channel to server is healthy"
108
+ )
94
109
 
95
110
  return self._channel
96
111
 
97
112
  def create_channel(self) -> grpc.aio.Channel:
98
113
  """Creates a new channel to the gRPC server.
99
114
 
100
- The channel is not be ready to use. Raises an exception on failure.
115
+ The channel is not ready to use. Raises an exception on failure.
101
116
  """
102
- channel_options: list[tuple[str, int]] = _channel_options(
103
- self._keep_alive_period_sec
104
- )
105
117
  if self._channel_credentials is None:
106
- return grpc.aio.insecure_channel(
107
- target=self._server_address, options=channel_options
108
- )
118
+ return grpc.aio.insecure_channel(target=self._server_address)
109
119
  else:
110
120
  return grpc.aio.secure_channel(
111
121
  target=self._server_address,
112
122
  credentials=self._channel_credentials,
113
- options=channel_options,
114
123
  )
115
124
 
116
125
  async def _create_ready_channel(self) -> grpc.aio.Channel:
@@ -119,25 +128,36 @@ class ChannelManager:
119
128
  Returns a ready to use channel. Blocks until the channel
120
129
  is ready, never raises any exceptions.
121
130
  """
122
- self._logger.info("creating new grpc server channel")
123
-
124
131
  with metric_grpc_server_channel_creation_latency.time():
125
132
  metric_grpc_server_channel_creations.inc()
126
133
  while True:
127
134
  try:
128
- channel = self.create_channel()
135
+ self._logger.info("creating new grpc server channel")
136
+ create_channel_start = time.monotonic()
137
+ channel: grpc.Channel = self.create_channel()
138
+ self._logger.info(
139
+ "grpc server channel created",
140
+ duration_sec=time.monotonic() - create_channel_start,
141
+ )
142
+
143
+ channel_ready_start = time.monotonic()
129
144
  await asyncio.wait_for(
130
145
  channel.channel_ready(),
131
146
  timeout=_CONNECT_TIMEOUT_SEC,
132
147
  )
148
+ self._logger.info(
149
+ "grpc server channel is established (ready)",
150
+ duration_sec=time.monotonic() - channel_ready_start,
151
+ )
152
+
133
153
  return channel
134
- except Exception:
154
+ except BaseException:
135
155
  self._logger.error(
136
156
  f"failed establishing grpc server channel in {_CONNECT_TIMEOUT_SEC} sec, retrying in {_RETRY_INTERVAL_SEC} sec"
137
157
  )
138
158
  try:
139
159
  await channel.close()
140
- except Exception as e:
160
+ except BaseException as e:
141
161
  self._logger.error(
142
162
  "failed closing not established channel", exc_info=e
143
163
  )
@@ -173,45 +193,3 @@ class ChannelManager:
173
193
  except Exception as e:
174
194
  self._logger.error("failed closing channel", exc_info=e)
175
195
  self._channel = None
176
-
177
-
178
- def _channel_options(keep_alive_period_sec: int) -> list[tuple[str, int]]:
179
- """Returns the gRPC channel options."""
180
- # See https://grpc.io/docs/guides/keepalive/.
181
- #
182
- # NB: Rust Tonic framework that we're using in Server is not using gRPC core and doesn't support
183
- # these options. From https://github.com/hyperium/tonic/issues/258 it supports gRPC PINGs when
184
- # there are in-flight RPCs (and streams) without any extra configuration.
185
- return [
186
- ("grpc.keepalive_time_ms", keep_alive_period_sec * 1000),
187
- (
188
- "grpc.http2.max_pings_without_data",
189
- -1,
190
- ), # Allow any number of empty PING messages
191
- (
192
- "grpc.keepalive_permit_without_calls",
193
- 0,
194
- ), # Don't send PINGs when there are no in-flight RPCs (and streams)
195
- ]
196
-
197
-
198
- def _keep_alive_period_sec_from_env(logger: Any) -> int:
199
- """Returns the keep alive period in seconds."""
200
- # We have to use gRPC keep alive (PING) to prevent proxies/load-balancers from closing underlying HTTP/2
201
- # (TCP) connections due to periods of idleness in gRPC streams that we use between Executor and Server.
202
- # If a proxy/load-balancer closes the connection, then we see it as gRPC stream errors which results in
203
- # a lot of error logs noise.
204
- #
205
- # The default period of 50 sec is used for one of the standard proxy/load-balancer timeouts of 1 minute.
206
- DEFAULT_KEEP_ALIVE_PERIOD_SEC = "50"
207
- keep_alive_period_sec = int(
208
- os.getenv(
209
- "INDEXIFY_EXECUTOR_GRPC_KEEP_ALIVE_PERIOD_SEC",
210
- DEFAULT_KEEP_ALIVE_PERIOD_SEC,
211
- )
212
- )
213
- if keep_alive_period_sec != int(DEFAULT_KEEP_ALIVE_PERIOD_SEC):
214
- logger.info(
215
- f"gRPC keep alive (PING) period is set to {keep_alive_period_sec} sec"
216
- )
217
- return keep_alive_period_sec
@@ -69,6 +69,7 @@ class Executor:
69
69
  self._channel_manager = ChannelManager(
70
70
  server_address=grpc_server_addr,
71
71
  config_path=config_path,
72
+ health_checker=health_checker,
72
73
  logger=self._logger,
73
74
  )
74
75
  function_allowlist: List[FunctionURI] = parse_function_uris(function_uris)
@@ -853,7 +853,6 @@ def _to_task_result_proto(output: TaskOutput) -> TaskResult:
853
853
  graph_version=output.allocation.task.graph_version,
854
854
  function_name=output.allocation.task.function_name,
855
855
  graph_invocation_id=output.allocation.task.graph_invocation_id,
856
- reducer=output.reducer,
857
856
  outcome_code=output.outcome_code,
858
857
  failure_reason=output.failure_reason,
859
858
  next_functions=output.next_functions,
@@ -159,7 +159,6 @@ def _task_output_from_function_executor_response(
159
159
  response_validator = MessageValidator(response)
160
160
  response_validator.required_field("stdout")
161
161
  response_validator.required_field("stderr")
162
- response_validator.required_field("is_reducer")
163
162
  response_validator.required_field("outcome_code")
164
163
 
165
164
  metrics = TaskMetrics(counters={}, timers={})
@@ -200,7 +199,6 @@ def _task_output_from_function_executor_response(
200
199
  next_functions=response.next_functions,
201
200
  stdout=response.stdout,
202
201
  stderr=response.stderr,
203
- reducer=response.is_reducer,
204
202
  metrics=metrics,
205
203
  )
206
204
 
@@ -37,7 +37,6 @@ class TaskOutput:
37
37
  next_functions: List[str] = [],
38
38
  stdout: Optional[str] = None,
39
39
  stderr: Optional[str] = None,
40
- reducer: bool = False,
41
40
  metrics: Optional[TaskMetrics] = None,
42
41
  ):
43
42
  self.task = allocation.task
@@ -46,7 +45,6 @@ class TaskOutput:
46
45
  self.next_functions = next_functions
47
46
  self.stdout = stdout
48
47
  self.stderr = stderr
49
- self.reducer = reducer
50
48
  self.outcome_code = outcome_code
51
49
  self.failure_reason = failure_reason
52
50
  self.invocation_error_output = invocation_error_output
@@ -0,0 +1,36 @@
1
+ from typing import Optional
2
+
3
+ from .health_checker import HealthChecker, HealthCheckResult
4
+
5
+ _HEALTH_CHECKER_NAME = "GenericHealthChecker"
6
+
7
+
8
+ class GenericHealthChecker(HealthChecker):
9
+ """A generic health checker that doesn't depend on machine type and other features of the environment.
10
+
11
+ The health checker uses software signals available in all environments like Function Executor failure rates.
12
+ """
13
+
14
+ def __init__(self):
15
+ self._server_connection_unhealthy_status_message: Optional[str] = None
16
+
17
+ def server_connection_state_changed(self, is_healthy: bool, status_message: str):
18
+ """Handle changes in server connection state."""
19
+ if is_healthy:
20
+ self._server_connection_unhealthy_status_message = None
21
+ else:
22
+ self._server_connection_unhealthy_status_message = status_message
23
+
24
+ async def check(self) -> HealthCheckResult:
25
+ if self._server_connection_unhealthy_status_message is not None:
26
+ return HealthCheckResult(
27
+ is_success=False,
28
+ status_message=self._server_connection_unhealthy_status_message,
29
+ checker_name=_HEALTH_CHECKER_NAME,
30
+ )
31
+
32
+ return HealthCheckResult(
33
+ is_success=True,
34
+ status_message="Successful",
35
+ checker_name=_HEALTH_CHECKER_NAME,
36
+ )
@@ -0,0 +1,16 @@
1
+ class HealthCheckResult:
2
+ def __init__(self, checker_name: str, is_success: bool, status_message: str):
3
+ self.checker_name: str = checker_name
4
+ self.is_success: bool = is_success
5
+ self.status_message: str = status_message
6
+
7
+
8
+ class HealthChecker:
9
+ """Abstract base class for health checkers."""
10
+
11
+ def server_connection_state_changed(self, is_healthy: bool, status_message: str):
12
+ """Handle changes in server connection state."""
13
+ raise NotImplementedError("Subclasses must implement this method.")
14
+
15
+ async def check(self) -> HealthCheckResult:
16
+ raise NotImplementedError("Subclasses must implement this method.")
@@ -36,6 +36,7 @@ from .metrics.state_reporter import (
36
36
  )
37
37
 
38
38
  _REPORTING_INTERVAL_SEC = 5
39
+ _REPORTING_BACKOFF_SEC = 5
39
40
  _REPORT_RPC_TIMEOUT_SEC = 5
40
41
 
41
42
 
@@ -49,7 +50,6 @@ class ExecutorStateReporter:
49
50
  channel_manager: ChannelManager,
50
51
  host_resources_provider: HostResourcesProvider,
51
52
  logger: Any,
52
- reporting_interval_sec: int = _REPORTING_INTERVAL_SEC,
53
53
  ):
54
54
  self._executor_id: str = executor_id
55
55
  self._version: str = version
@@ -58,7 +58,6 @@ class ExecutorStateReporter:
58
58
  self._hostname: str = gethostname()
59
59
  self._channel_manager = channel_manager
60
60
  self._logger: Any = logger.bind(module=__name__)
61
- self._reporting_interval_sec: int = reporting_interval_sec
62
61
  self._allowed_functions: List[AllowedFunction] = _to_allowed_function_protos(
63
62
  function_allowlist
64
63
  )
@@ -180,7 +179,7 @@ class ExecutorStateReporter:
180
179
  async def _periodic_state_report_scheduler_loop(self) -> None:
181
180
  while True:
182
181
  self._state_report_scheduled_event.set()
183
- await asyncio.sleep(self._reporting_interval_sec)
182
+ await asyncio.sleep(_REPORTING_INTERVAL_SEC)
184
183
 
185
184
  async def _state_report_worker_loop(self) -> None:
186
185
  """Runs the state reporter.
@@ -202,9 +201,10 @@ class ExecutorStateReporter:
202
201
  self._state_reported_event.set()
203
202
  except Exception as e:
204
203
  self._logger.error(
205
- f"failed to report state to the server, retrying in {self._reporting_interval_sec} sec.",
204
+ f"failed to report state to the server, backing-off for {_REPORTING_BACKOFF_SEC} sec.",
206
205
  exc_info=e,
207
206
  )
207
+ await asyncio.sleep(_REPORTING_BACKOFF_SEC)
208
208
  break # exit the inner loop to recreate the channel if needed
209
209
 
210
210
  async def _report_state(self, stub: ExecutorAPIStub):
@@ -262,7 +262,6 @@ message TaskResult {
262
262
  optional string graph_version = 5;
263
263
  optional string function_name = 6;
264
264
  optional string graph_invocation_id = 7;
265
- optional bool reducer = 8;
266
265
  optional TaskOutcomeCode outcome_code = 9;
267
266
  optional TaskFailureReason failure_reason = 10;
268
267
  // Edges that the function wants the invocation to be routed to.
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
19
19
 
20
20
 
21
21
  DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
22
- b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xcf\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x12)\n!allocation_ids_caused_termination\x18\x04 \x03(\tB\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
22
+ b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xcf\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x12)\n!allocation_ids_caused_termination\x18\x04 \x03(\tB\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x8e\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x07\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
23
23
  )
24
24
 
25
25
  _globals = globals()
@@ -31,20 +31,20 @@ if not _descriptor._USE_C_DESCRIPTORS:
31
31
  DESCRIPTOR._loaded_options = None
32
32
  _globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
33
33
  _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
34
- _globals["_DATAPAYLOADENCODING"]._serialized_start = 5366
35
- _globals["_DATAPAYLOADENCODING"]._serialized_end = 5575
36
- _globals["_GPUMODEL"]._serialized_start = 5578
37
- _globals["_GPUMODEL"]._serialized_end = 5792
38
- _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5795
39
- _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5974
40
- _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5977
41
- _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6509
42
- _globals["_EXECUTORSTATUS"]._serialized_start = 6512
43
- _globals["_EXECUTORSTATUS"]._serialized_end = 6677
44
- _globals["_TASKOUTCOMECODE"]._serialized_start = 6679
45
- _globals["_TASKOUTCOMECODE"]._serialized_end = 6789
46
- _globals["_TASKFAILUREREASON"]._serialized_start = 6792
47
- _globals["_TASKFAILUREREASON"]._serialized_end = 7102
34
+ _globals["_DATAPAYLOADENCODING"]._serialized_start = 5332
35
+ _globals["_DATAPAYLOADENCODING"]._serialized_end = 5541
36
+ _globals["_GPUMODEL"]._serialized_start = 5544
37
+ _globals["_GPUMODEL"]._serialized_end = 5758
38
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5761
39
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5940
40
+ _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5943
41
+ _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6475
42
+ _globals["_EXECUTORSTATUS"]._serialized_start = 6478
43
+ _globals["_EXECUTORSTATUS"]._serialized_end = 6643
44
+ _globals["_TASKOUTCOMECODE"]._serialized_start = 6645
45
+ _globals["_TASKOUTCOMECODE"]._serialized_end = 6755
46
+ _globals["_TASKFAILUREREASON"]._serialized_start = 6758
47
+ _globals["_TASKFAILUREREASON"]._serialized_end = 7068
48
48
  _globals["_DATAPAYLOAD"]._serialized_start = 55
49
49
  _globals["_DATAPAYLOAD"]._serialized_end = 290
50
50
  _globals["_GPURESOURCES"]._serialized_start = 292
@@ -82,7 +82,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
82
82
  _globals["_DESIREDEXECUTORSTATE"]._serialized_start = 4359
83
83
  _globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4544
84
84
  _globals["_TASKRESULT"]._serialized_start = 4547
85
- _globals["_TASKRESULT"]._serialized_end = 5363
86
- _globals["_EXECUTORAPI"]._serialized_start = 7105
87
- _globals["_EXECUTORAPI"]._serialized_end = 7360
85
+ _globals["_TASKRESULT"]._serialized_end = 5329
86
+ _globals["_EXECUTORAPI"]._serialized_start = 7071
87
+ _globals["_EXECUTORAPI"]._serialized_end = 7326
88
88
  # @@protoc_insertion_point(module_scope)
@@ -536,7 +536,6 @@ class TaskResult(_message.Message):
536
536
  "graph_version",
537
537
  "function_name",
538
538
  "graph_invocation_id",
539
- "reducer",
540
539
  "outcome_code",
541
540
  "failure_reason",
542
541
  "next_functions",
@@ -552,7 +551,6 @@ class TaskResult(_message.Message):
552
551
  GRAPH_VERSION_FIELD_NUMBER: _ClassVar[int]
553
552
  FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
554
553
  GRAPH_INVOCATION_ID_FIELD_NUMBER: _ClassVar[int]
555
- REDUCER_FIELD_NUMBER: _ClassVar[int]
556
554
  OUTCOME_CODE_FIELD_NUMBER: _ClassVar[int]
557
555
  FAILURE_REASON_FIELD_NUMBER: _ClassVar[int]
558
556
  NEXT_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
@@ -567,7 +565,6 @@ class TaskResult(_message.Message):
567
565
  graph_version: str
568
566
  function_name: str
569
567
  graph_invocation_id: str
570
- reducer: bool
571
568
  outcome_code: TaskOutcomeCode
572
569
  failure_reason: TaskFailureReason
573
570
  next_functions: _containers.RepeatedScalarFieldContainer[str]
@@ -584,7 +581,6 @@ class TaskResult(_message.Message):
584
581
  graph_version: _Optional[str] = ...,
585
582
  function_name: _Optional[str] = ...,
586
583
  graph_invocation_id: _Optional[str] = ...,
587
- reducer: bool = ...,
588
584
  outcome_code: _Optional[_Union[TaskOutcomeCode, str]] = ...,
589
585
  failure_reason: _Optional[_Union[TaskFailureReason, str]] = ...,
590
586
  next_functions: _Optional[_Iterable[str]] = ...,
@@ -6,7 +6,7 @@ import grpc
6
6
 
7
7
  from indexify.proto import executor_api_pb2 as indexify_dot_proto_dot_executor__api__pb2
8
8
 
9
- GRPC_GENERATED_VERSION = "1.73.0"
9
+ GRPC_GENERATED_VERSION = "1.73.1"
10
10
  GRPC_VERSION = grpc.__version__
11
11
  _version_not_supported = False
12
12
 
@@ -1,20 +0,0 @@
1
- from .health_checker import HealthChecker, HealthCheckResult
2
-
3
- HEALTH_CHECKER_NAME = "GenericHealthChecker"
4
-
5
-
6
- class GenericHealthChecker(HealthChecker):
7
- """A generic health checker that doesn't depend on machine type and other features of the environment.
8
-
9
- The health checker uses software signals available in all environments like Function Executor failure rates.
10
- """
11
-
12
- def __init__(self):
13
- pass
14
-
15
- async def check(self) -> HealthCheckResult:
16
- return HealthCheckResult(
17
- is_success=True,
18
- status_message="The health check is always successful",
19
- checker_name=HEALTH_CHECKER_NAME,
20
- )
@@ -1,12 +0,0 @@
1
- class HealthCheckResult:
2
- def __init__(self, checker_name: str, is_success: bool, status_message: str):
3
- self.checker_name = checker_name
4
- self.is_success = is_success
5
- self.status_message = status_message
6
-
7
-
8
- class HealthChecker:
9
- """Abstract base class for health checkers."""
10
-
11
- async def check(self) -> HealthCheckResult:
12
- raise NotImplementedError("Subclasses must implement this method.")
File without changes