indexify 0.4.10__tar.gz → 0.4.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {indexify-0.4.10 → indexify-0.4.11}/PKG-INFO +2 -2
  2. {indexify-0.4.10 → indexify-0.4.11}/pyproject.toml +2 -2
  3. indexify-0.4.11/src/indexify/executor/function_executor_controller/destroy_function_executor.py +31 -0
  4. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/events.py +3 -17
  5. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/function_executor_controller.py +141 -112
  6. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +25 -18
  7. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/run_task.py +14 -0
  8. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/state_reconciler.py +2 -7
  9. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api.proto +1 -4
  10. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2.py +10 -10
  11. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2.pyi +0 -12
  12. indexify-0.4.10/src/indexify/executor/function_executor_controller/destroy_function_executor.py +0 -28
  13. {indexify-0.4.10 → indexify-0.4.11}/README.md +0 -0
  14. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/__init__.py +0 -0
  15. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/build_image.py +0 -0
  16. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/deploy.py +0 -0
  17. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/executor.py +0 -0
  18. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/README.md +0 -0
  19. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/blob_store.py +0 -0
  20. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
  21. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  22. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
  23. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/channel_manager.py +0 -0
  24. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/executor.py +0 -0
  25. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_allowlist.py +0 -0
  26. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/function_executor.py +0 -0
  27. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/health_checker.py +0 -0
  28. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  29. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  30. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  31. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  32. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  33. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  34. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  35. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  36. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  37. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
  38. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
  39. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -0
  40. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
  41. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
  42. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
  43. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
  44. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
  45. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
  46. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
  47. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
  48. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
  49. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
  50. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
  51. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/task_output.py +0 -0
  52. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
  53. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/host_resources.py +0 -0
  54. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  55. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  56. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/channel_manager.py +0 -0
  57. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/executor.py +0 -0
  58. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/state_reconciler.py +0 -0
  59. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/state_reporter.py +0 -0
  60. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/handler.py +0 -0
  61. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  62. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  63. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  64. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/metrics.py +0 -0
  65. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  66. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/server.py +0 -0
  67. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  68. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/state_reporter.py +0 -0
  69. {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.10
3
+ Version: 0.4.11
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
17
17
  Requires-Dist: boto3 (>=1.37.30,<2.0.0)
18
18
  Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
19
19
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
20
- Requires-Dist: tensorlake (==0.2.7)
20
+ Requires-Dist: tensorlake (==0.2.8)
21
21
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.4.10"
4
+ version = "0.4.11"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -25,7 +25,7 @@ prometheus-client = "^0.21.1"
25
25
  psutil = "^7.0.0"
26
26
  # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
27
27
  # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
28
- tensorlake = "0.2.7"
28
+ tensorlake = "0.2.8"
29
29
  # Uncomment the next line to use local tensorlake package (only for development!)
30
30
  # tensorlake = { path = "../tensorlake", develop = true }
31
31
  # pydantic is provided by tensorlake
@@ -0,0 +1,31 @@
1
+ import asyncio
2
+ from typing import Any, Optional
3
+
4
+ from indexify.executor.function_executor.function_executor import FunctionExecutor
5
+
6
+ from .events import FunctionExecutorDestroyed
7
+
8
+
9
+ async def destroy_function_executor(
10
+ function_executor: Optional[FunctionExecutor],
11
+ lock: asyncio.Lock,
12
+ logger: Any,
13
+ ) -> FunctionExecutorDestroyed:
14
+ """Destroys the function executor if it's not None.
15
+
16
+ The supplied lock is used to ensure that if a destroy operation is in progress,
17
+ then another caller won't return immediately assuming that the destroy is complete
18
+ due to its idempotency.
19
+
20
+ Doesn't raise any exceptions.
21
+ """
22
+ logger = logger.bind(module=__name__)
23
+
24
+ if function_executor is not None:
25
+ async with lock:
26
+ logger.info(
27
+ "destroying function executor",
28
+ )
29
+ await function_executor.destroy()
30
+
31
+ return FunctionExecutorDestroyed(is_success=True)
@@ -55,19 +55,12 @@ class FunctionExecutorDestroyed(BaseEvent):
55
55
  Event indicating that Function Executor has been destroyed.
56
56
  """
57
57
 
58
- def __init__(
59
- self, is_success: bool, termination_reason: FunctionExecutorTerminationReason
60
- ):
58
+ def __init__(self, is_success: bool):
61
59
  super().__init__(EventType.FUNCTION_EXECUTOR_DESTROYED)
62
60
  self.is_success: bool = is_success
63
- self.termination_reason: FunctionExecutorTerminationReason = termination_reason
64
61
 
65
62
  def __str__(self) -> str:
66
- return (
67
- f"Event(type={self.event_type.name}, "
68
- f"is_success={self.is_success}, "
69
- f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
70
- )
63
+ return f"Event(type={self.event_type.name}, " f"is_success={self.is_success})"
71
64
 
72
65
 
73
66
  class ShutdownInitiated(BaseEvent):
@@ -75,15 +68,8 @@ class ShutdownInitiated(BaseEvent):
75
68
  Event indicating that Function Executor shutdown has been initiated.
76
69
  """
77
70
 
78
- def __init__(self, termination_reason: FunctionExecutorTerminationReason):
71
+ def __init__(self):
79
72
  super().__init__(EventType.SHUTDOWN_INITIATED)
80
- self.termination_reason: FunctionExecutorTerminationReason = termination_reason
81
-
82
- def __str__(self) -> str:
83
- return (
84
- f"Event(type={self.event_type.name}, "
85
- f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
86
- )
87
73
 
88
74
 
89
75
  class TaskPreparationFinished(BaseEvent):
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import time
3
3
  from collections.abc import Coroutine
4
+ from enum import Enum
4
5
  from pathlib import Path
5
6
  from typing import Any, Dict, List, Optional
6
7
 
@@ -43,12 +44,14 @@ from .events import (
43
44
  from .function_executor_startup_output import FunctionExecutorStartupOutput
44
45
  from .loggers import function_executor_logger, task_allocation_logger
45
46
  from .metrics.function_executor_controller import (
46
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_PENDING,
47
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_RUNNING,
48
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_TERMINATED,
49
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_UNKNOWN,
47
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED,
48
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING,
49
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP,
50
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED,
51
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING,
52
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN,
50
53
  metric_control_loop_handle_event_latency,
51
- metric_function_executors_with_status,
54
+ metric_function_executors_with_state,
52
55
  metric_runnable_tasks,
53
56
  metric_runnable_tasks_per_function_name,
54
57
  metric_schedule_task_latency,
@@ -61,6 +64,16 @@ from .task_output import TaskOutput
61
64
  from .upload_task_output import upload_task_output
62
65
 
63
66
 
67
+ # Actual FE controller states, they are a bit different from statuses reported to the Server.
68
+ # All the valid state transitions are forward only (can skip multiple states in a row).
69
+ class _FE_CONTROLLER_STATE(Enum):
70
+ NOT_STARTED = 1
71
+ STARTING_UP = 2
72
+ RUNNING = 3
73
+ TERMINATING = 4
74
+ TERMINATED = 5
75
+
76
+
64
77
  class FunctionExecutorController:
65
78
  def __init__(
66
79
  self,
@@ -94,19 +107,18 @@ class FunctionExecutorController:
94
107
  self._logger: Any = function_executor_logger(
95
108
  function_executor_description, logger.bind(module=__name__)
96
109
  )
97
- # Mutable state. No lock needed as it's modified by async tasks running in
98
- # the same event loop.
110
+ self._destroy_lock: asyncio.Lock = asyncio.Lock()
111
+ # Mutable state. No lock needed as it's modified by async tasks running in the same event loop.
99
112
  self._fe: Optional[FunctionExecutor] = None
100
- self._fe_termination_reason: FunctionExecutorTerminationReason = (
101
- None # Optional
102
- )
103
- # FE Status reported to Server.
104
- self._status: FunctionExecutorStatus = (
105
- FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_UNKNOWN
106
- )
107
- metric_function_executors_with_status.labels(
108
- status=_to_fe_status_metric_label(self._status, self._logger)
113
+ self._fe_termination_reason: Optional[FunctionExecutorTerminationReason] = None
114
+ self._internal_state = _FE_CONTROLLER_STATE.NOT_STARTED
115
+ metric_function_executors_with_state.labels(
116
+ state=_to_fe_state_metric_label(self._internal_state, self._logger)
109
117
  ).inc()
118
+ self._reported_state: FunctionExecutorState = FunctionExecutorState(
119
+ description=function_executor_description,
120
+ status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_UNKNOWN,
121
+ )
110
122
  # Ordered list of events to be processed by the control loop.
111
123
  self._events: List[BaseEvent] = []
112
124
  # Asyncio event used to notify the control loop that there are new events to process.
@@ -124,13 +136,6 @@ class FunctionExecutorController:
124
136
  def function_executor_id(self) -> str:
125
137
  return self._fe_description.id
126
138
 
127
- def status(self) -> FunctionExecutorStatus:
128
- """Returns the current status of the Function Executor.
129
-
130
- Not blocking.
131
- """
132
- return self._status
133
-
134
139
  def add_task_allocation(self, task_allocation: TaskAllocation) -> None:
135
140
  """Adds a task to the Function Executor.
136
141
 
@@ -205,9 +210,10 @@ class FunctionExecutorController:
205
210
  """Starts up the Function Executor and prepares it to run tasks.
206
211
 
207
212
  Not blocking. Never raises exceptions."""
208
- if self._control_loop_aio_task is not None:
213
+ if self._internal_state != _FE_CONTROLLER_STATE.NOT_STARTED:
209
214
  self._logger.warning(
210
- "ignoring startup call as the Function Executor is already started"
215
+ "function executor state is not NOT_STARTED, ignoring startup call",
216
+ internal_state=self._internal_state.name,
211
217
  )
212
218
  return
213
219
 
@@ -215,7 +221,13 @@ class FunctionExecutorController:
215
221
  self._control_loop(),
216
222
  name="function executor control loop",
217
223
  )
218
- self._set_status(FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_PENDING)
224
+ self._update_internal_state(_FE_CONTROLLER_STATE.STARTING_UP)
225
+ self._update_reported_state(
226
+ FunctionExecutorState(
227
+ description=self._fe_description,
228
+ status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_PENDING,
229
+ )
230
+ )
219
231
  next_aio = create_function_executor(
220
232
  function_executor_description=self._fe_description,
221
233
  function_executor_server_factory=self._fe_server_factory,
@@ -237,17 +249,13 @@ class FunctionExecutorController:
237
249
  ),
238
250
  )
239
251
 
240
- async def shutdown(
241
- self, termination_reason: FunctionExecutorTerminationReason
242
- ) -> None:
252
+ async def shutdown(self) -> None:
243
253
  """Shutsdown the Function Executor and frees all of its resources.
244
254
 
245
- All the tasks are reported as failed with FE Terminated failure code.
255
+ No task outcomes and outputs are getting reported to Server after this call.
246
256
  Doesn't raise any exceptions. Blocks until the shutdown is complete.
247
257
  """
248
- self._add_event(
249
- ShutdownInitiated(termination_reason=termination_reason), source="shutdown"
250
- )
258
+ self._add_event(ShutdownInitiated(), source="shutdown")
251
259
  try:
252
260
  await self._control_loop_aio_task
253
261
  except asyncio.CancelledError:
@@ -259,51 +267,49 @@ class FunctionExecutorController:
259
267
  )
260
268
  self._logger.info("function executor controller shutdown finished")
261
269
 
262
- def _set_status(
270
+ def _update_internal_state(self, new_state: _FE_CONTROLLER_STATE) -> None:
271
+ """Updates the internal state of the Function Executor Controller.
272
+
273
+ Not blocking. Never raises exceptions."""
274
+ old_state: _FE_CONTROLLER_STATE = self._internal_state
275
+ self._internal_state = new_state
276
+
277
+ self._logger.info(
278
+ "function executor internal state changed",
279
+ old_state=old_state.name,
280
+ new_state=new_state.name,
281
+ )
282
+
283
+ metric_function_executors_with_state.labels(
284
+ state=_to_fe_state_metric_label(old_state, self._logger)
285
+ ).dec()
286
+ metric_function_executors_with_state.labels(
287
+ state=_to_fe_state_metric_label(new_state, self._logger)
288
+ ).inc()
289
+
290
+ def _update_reported_state(
263
291
  self,
264
- status: FunctionExecutorStatus,
292
+ new_state: FunctionExecutorState,
265
293
  ) -> None:
266
- """Sets Function Executor status and reports it to the Server.
294
+ """Sets new Function Executor state and reports it to the Server.
267
295
 
268
296
  Not blocking. Never raises exceptions."""
269
- old_status: FunctionExecutorStatus = self._status
270
- new_status: FunctionExecutorStatus = status
271
- self._status: FunctionExecutorStatus = new_status
297
+ old_state: FunctionExecutorState = self._reported_state
298
+ self._reported_state = new_state
272
299
 
273
300
  self._logger.info(
274
- "function executor status changed",
275
- old_status=FunctionExecutorStatus.Name(old_status),
276
- new_status=FunctionExecutorStatus.Name(new_status),
301
+ "function executor grpc status changed",
302
+ old_status=FunctionExecutorStatus.Name(old_state.status),
303
+ new_status=FunctionExecutorStatus.Name(new_state.status),
277
304
  termination_reason=_termination_reason_to_short_name(
278
- self._fe_termination_reason
305
+ new_state.termination_reason
279
306
  ),
280
307
  )
281
- metric_function_executors_with_status.labels(
282
- status=_to_fe_status_metric_label(old_status, self._logger)
283
- ).dec()
284
- metric_function_executors_with_status.labels(
285
- status=_to_fe_status_metric_label(new_status, self._logger)
286
- ).inc()
287
308
 
288
- self._state_reporter.update_function_executor_state(self._current_state())
309
+ self._state_reporter.update_function_executor_state(new_state)
289
310
  # Report the status change to the Server asap to reduce latency in the system.
290
311
  self._state_reporter.schedule_state_report()
291
312
 
292
- def _current_state(self) -> FunctionExecutorState:
293
- """Returns the current state of the Function Executor.
294
-
295
- Not blocking. Never raises exceptions.
296
- """
297
- termination_reason: Optional[FunctionExecutorTerminationReason] = None
298
- if self._fe_termination_reason is not None:
299
- termination_reason = self._fe_termination_reason
300
-
301
- return FunctionExecutorState(
302
- description=self._fe_description,
303
- status=self._status,
304
- termination_reason=termination_reason,
305
- )
306
-
307
313
  async def _control_loop(self) -> None:
308
314
  """Runs control loop that coordinates all the work done by the Function Executor.
309
315
 
@@ -454,13 +460,17 @@ class FunctionExecutorController:
454
460
  self._state_reporter.schedule_state_report()
455
461
 
456
462
  if event.function_executor is None:
457
- self._destroy_function_executor_before_termination(
458
- event.output.termination_reason
459
- )
463
+ self._start_termination(termination_reason=event.output.termination_reason)
460
464
  return
461
465
 
462
466
  self._fe = event.function_executor
463
- self._set_status(FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING)
467
+ self._update_internal_state(_FE_CONTROLLER_STATE.RUNNING)
468
+ self._update_reported_state(
469
+ FunctionExecutorState(
470
+ description=self._fe_description,
471
+ status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING,
472
+ )
473
+ )
464
474
  # Health checker starts after FE creation and gets automatically stopped on FE destroy.
465
475
  self._fe.health_checker().start(self._health_check_failed_callback)
466
476
  self._add_event(
@@ -479,9 +489,18 @@ class FunctionExecutorController:
479
489
  self._logger.error(
480
490
  "Function Executor destroy failed unexpectedly, this should never happen",
481
491
  )
482
- # Set the status only after the FE got destroyed because Server assumes that all FE resources are freed when the status changes.
483
- self._fe_termination_reason = event.termination_reason
484
- self._set_status(FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED)
492
+
493
+ self._fe = None
494
+ # Set reported status only after the FE got destroyed because Server assumes that all FE resources are freed when the status changes.
495
+ self._update_reported_state(
496
+ FunctionExecutorState(
497
+ description=self._fe_description,
498
+ status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED,
499
+ termination_reason=self._fe_termination_reason,
500
+ )
501
+ )
502
+ self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATED)
503
+
485
504
  # Invoke the scheduler so it can fail runnable tasks with FE Terminated error.
486
505
  self._add_event(
487
506
  ScheduleTaskExecution(),
@@ -493,7 +512,7 @@ class FunctionExecutorController:
493
512
  "Function Executor health check failed, terminating Function Executor",
494
513
  reason=result.reason,
495
514
  )
496
- self._destroy_function_executor_before_termination(
515
+ self._start_termination(
497
516
  termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY
498
517
  )
499
518
 
@@ -532,14 +551,15 @@ class FunctionExecutorController:
532
551
  if len(self._runnable_tasks) == 0:
533
552
  return
534
553
 
535
- if self._status not in [
536
- FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING,
537
- FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED,
554
+ if self._internal_state not in [
555
+ _FE_CONTROLLER_STATE.RUNNING,
556
+ _FE_CONTROLLER_STATE.TERMINATING,
557
+ _FE_CONTROLLER_STATE.TERMINATED,
538
558
  ]:
539
- return # Can't progress pending task with the current status.
559
+ return # Can't progress runnable tasks in the current state.
540
560
 
541
561
  if (
542
- self._status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING
562
+ self._internal_state == _FE_CONTROLLER_STATE.RUNNING
543
563
  and self._running_task is not None
544
564
  ):
545
565
  return
@@ -555,12 +575,15 @@ class FunctionExecutorController:
555
575
  if task_info.is_cancelled:
556
576
  task_info.output = TaskOutput.task_cancelled(task_info.allocation)
557
577
  self._start_task_output_upload(task_info)
558
- elif self._status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED:
578
+ elif self._internal_state in [
579
+ _FE_CONTROLLER_STATE.TERMINATING,
580
+ _FE_CONTROLLER_STATE.TERMINATED,
581
+ ]:
559
582
  task_info.output = TaskOutput.function_executor_terminated(
560
583
  task_info.allocation
561
584
  )
562
585
  self._start_task_output_upload(task_info)
563
- elif self._status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING:
586
+ elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
564
587
  self._running_task = task_info
565
588
  next_aio = run_task_on_function_executor(
566
589
  task_info=task_info,
@@ -603,7 +626,7 @@ class FunctionExecutorController:
603
626
  ScheduleTaskExecution(), source="_handle_event_task_execution_finished"
604
627
  )
605
628
  else:
606
- self._destroy_function_executor_before_termination(
629
+ self._start_termination(
607
630
  termination_reason=event.function_executor_termination_reason
608
631
  )
609
632
 
@@ -660,24 +683,31 @@ class FunctionExecutorController:
660
683
  )
661
684
  self._state_reporter.schedule_state_report()
662
685
 
663
- def _destroy_function_executor_before_termination(
686
+ def _start_termination(
664
687
  self, termination_reason: FunctionExecutorTerminationReason
665
688
  ) -> None:
666
- """Destroys the Function Executor and frees all its resources to prepare for transitioning to the TERMINATED state.
689
+ """Starts termination of the Function Executor if it's not started yet.
667
690
 
668
691
  Doesn't raise any exceptions. Doesn't block.
669
692
  """
693
+ if self._internal_state in [
694
+ _FE_CONTROLLER_STATE.TERMINATING,
695
+ _FE_CONTROLLER_STATE.TERMINATED,
696
+ ]:
697
+ # _start_termination() can be called multiple times, e.g. by each failed task alloc
698
+ # when the FE is unhealthy. Dedup the calls to keep state machine consistent.
699
+ return
700
+
701
+ self._fe_termination_reason = termination_reason
702
+ self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATING)
670
703
  next_aio = destroy_function_executor(
671
704
  function_executor=self._fe,
672
- termination_reason=termination_reason,
705
+ lock=self._destroy_lock,
673
706
  logger=self._logger,
674
707
  )
675
- self._fe = None
676
708
  self._spawn_aio_for_fe(
677
709
  aio=next_aio,
678
- on_exception=FunctionExecutorDestroyed(
679
- is_success=False, termination_reason=termination_reason
680
- ),
710
+ on_exception=FunctionExecutorDestroyed(is_success=False),
681
711
  )
682
712
 
683
713
  async def _shutdown_no_exceptions(self, event: ShutdownInitiated) -> None:
@@ -716,16 +746,15 @@ class FunctionExecutorController:
716
746
  # BaseException includes asyncio.CancelledError which is always raised here.
717
747
  pass
718
748
 
719
- if self._status != FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED:
720
- self._handle_event_function_executor_destroyed(
721
- await destroy_function_executor(
722
- function_executor=self._fe,
723
- termination_reason=event.termination_reason,
724
- logger=self._logger,
725
- )
726
- )
727
- metric_function_executors_with_status.labels(
728
- status=_to_fe_status_metric_label(self._status, self._logger)
749
+ await destroy_function_executor(
750
+ function_executor=self._fe,
751
+ lock=self._destroy_lock,
752
+ logger=self._logger,
753
+ )
754
+
755
+ # Cleanup the metric from this FE.
756
+ metric_function_executors_with_state.labels(
757
+ state=_to_fe_state_metric_label(self._internal_state, self._logger)
729
758
  ).dec()
730
759
 
731
760
  self._state_reporter.remove_function_executor_state(self.function_executor_id())
@@ -735,21 +764,23 @@ class FunctionExecutorController:
735
764
  debug_print_events(events=self._events, logger=self._logger)
736
765
 
737
766
 
738
- def _to_fe_status_metric_label(status: FunctionExecutorStatus, logger: Any) -> str:
739
- if status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_UNKNOWN:
740
- return METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_UNKNOWN
741
- elif status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_PENDING:
742
- return METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_PENDING
743
- elif status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING:
744
- return METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_RUNNING
745
- elif status == FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED:
746
- return METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_TERMINATED
767
+ def _to_fe_state_metric_label(state: _FE_CONTROLLER_STATE, logger: Any) -> str:
768
+ if state == _FE_CONTROLLER_STATE.NOT_STARTED:
769
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED
770
+ elif state == _FE_CONTROLLER_STATE.STARTING_UP:
771
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP
772
+ elif state == _FE_CONTROLLER_STATE.RUNNING:
773
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING
774
+ elif state == _FE_CONTROLLER_STATE.TERMINATING:
775
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING
776
+ elif state == _FE_CONTROLLER_STATE.TERMINATED:
777
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED
747
778
  else:
748
779
  logger.error(
749
- "unexpected Function Executor status",
750
- status=FunctionExecutorStatus.Name(status),
780
+ "unexpected Function Executor internal state",
781
+ state=state.name,
751
782
  )
752
- return METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_UNKNOWN
783
+ return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN
753
784
 
754
785
 
755
786
  _termination_reason_to_short_name_map = {
@@ -757,8 +788,6 @@ _termination_reason_to_short_name_map = {
757
788
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR: "STARTUP_FAILED_INTERNAL_ERROR",
758
789
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR: "STARTUP_FAILED_FUNCTION_ERROR",
759
790
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: "STARTUP_FAILED_FUNCTION_TIMEOUT",
760
- FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: "EXECUTOR_SHUTDOWN",
761
- FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: "REMOVED_FROM_DESIRED_STATE",
762
791
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: "UNHEALTHY",
763
792
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR: "INTERNAL_ERROR",
764
793
  FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT: "FUNCTION_TIMEOUT",
@@ -34,27 +34,34 @@ metric_runnable_tasks_per_function_name: prometheus_client.Gauge = (
34
34
  )
35
35
  )
36
36
 
37
- metric_function_executors_with_status: prometheus_client.Gauge = (
38
- prometheus_client.Gauge(
39
- "function_executors_with_status",
40
- "Number of Function Executors with a particular status",
41
- ["status"],
42
- )
37
+ metric_function_executors_with_state: prometheus_client.Gauge = prometheus_client.Gauge(
38
+ "function_executors_with_state",
39
+ "Number of Function Executors with a particular internal state",
40
+ ["state"],
43
41
  )
44
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_UNKNOWN = "unknown"
45
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_PENDING = "pending"
46
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_RUNNING = "running"
47
- METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_TERMINATED = "terminated"
42
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN = "unknown"
43
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED = "not_started"
44
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP = "starting_up"
45
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING = "running"
46
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING = "terminating"
47
+ METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED = "terminated"
48
+
48
49
 
49
- metric_function_executors_with_status.labels(
50
- status=METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_UNKNOWN
50
+ metric_function_executors_with_state.labels(
51
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN
52
+ )
53
+ metric_function_executors_with_state.labels(
54
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED
55
+ )
56
+ metric_function_executors_with_state.labels(
57
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP
51
58
  )
52
- metric_function_executors_with_status.labels(
53
- status=METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_PENDING
59
+ metric_function_executors_with_state.labels(
60
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING
54
61
  )
55
- metric_function_executors_with_status.labels(
56
- status=METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_RUNNING
62
+ metric_function_executors_with_state.labels(
63
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING
57
64
  )
58
- metric_function_executors_with_status.labels(
59
- status=METRIC_FUNCTION_EXECUTORS_WITH_STATUS_LABEL_TERMINATED
65
+ metric_function_executors_with_state.labels(
66
+ state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED
60
67
  )
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import os
3
+ import random
2
4
  import time
3
5
  from typing import Any, Optional
4
6
 
@@ -38,6 +40,10 @@ from .metrics.run_task import (
38
40
  from .task_info import TaskInfo
39
41
  from .task_output import TaskMetrics, TaskOutput
40
42
 
43
+ _ENABLE_INJECT_TASK_CANCELLATIONS = (
44
+ os.getenv("INDEXIFY_INJECT_TASK_CANCELLATIONS", "0") == "1"
45
+ )
46
+
41
47
 
42
48
  async def run_task_on_function_executor(
43
49
  task_info: TaskInfo, function_executor: FunctionExecutor, logger: Any
@@ -177,6 +183,14 @@ def _task_output_from_function_executor_response(
177
183
  response_validator.required_field("invocation_error_output")
178
184
  invocation_error_output = response.invocation_error_output
179
185
 
186
+ if _ENABLE_INJECT_TASK_CANCELLATIONS:
187
+ logger.warning("injecting cancellation failure for the task allocation")
188
+ if (
189
+ random.random() < 0.5
190
+ ): # 50% chance to get stable reproduction in manual testing
191
+ outcome_code = TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE
192
+ failure_reason = TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED
193
+
180
194
  return TaskOutput(
181
195
  allocation=allocation,
182
196
  outcome_code=outcome_code,
@@ -7,7 +7,6 @@ from tensorlake.function_executor.proto.message_validator import MessageValidato
7
7
  from indexify.proto.executor_api_pb2 import (
8
8
  DesiredExecutorState,
9
9
  FunctionExecutorDescription,
10
- FunctionExecutorTerminationReason,
11
10
  GetDesiredExecutorStatesRequest,
12
11
  TaskAllocation,
13
12
  )
@@ -123,9 +122,7 @@ class ExecutorStateReconciler:
123
122
  for fe_controller in self._function_executor_controllers.values():
124
123
  fe_shutdown_tasks.append(
125
124
  asyncio.create_task(
126
- fe_controller.shutdown(
127
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN
128
- ),
125
+ fe_controller.shutdown(),
129
126
  name=f"Shutdown Function Executor {fe_controller.function_executor_id()}",
130
127
  )
131
128
  )
@@ -327,9 +324,7 @@ class ExecutorStateReconciler:
327
324
  self._function_executor_controllers.pop(function_executor_id)
328
325
  )
329
326
  asyncio.create_task(
330
- fe_controller.shutdown(
331
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE
332
- ),
327
+ fe_controller.shutdown(),
333
328
  name=f"Shutdown Function Executor {function_executor_id}",
334
329
  )
335
330
 
@@ -69,6 +69,7 @@ enum FunctionExecutorStatus {
69
69
  FUNCTION_EXECUTOR_STATUS_TERMINATED = 3;
70
70
  }
71
71
 
72
+ // The reasons why an Executor decided to terminate a Function Executor.
72
73
  enum FunctionExecutorTerminationReason {
73
74
  FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN = 0;
74
75
  // Internal error aka platform error on FE startup.
@@ -79,10 +80,6 @@ enum FunctionExecutorTerminationReason {
79
80
  // Timeout on FE startup while running the function constructor.
80
81
  FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT = 3;
81
82
 
82
- // FE was terminated on Executor shutdown.
83
- FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN = 10;
84
- // FE was removed from the Executor desired state by Server.
85
- FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE = 11;
86
83
  // FE was terminated because it failed a health check.
87
84
  FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY = 12;
88
85
  // FE was terminated due to an unrecoverable internal error on Executor.
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
19
19
 
20
20
 
21
21
  DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
22
- b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xa4\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x95\x05\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12:\n6FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN\x10\n\x12\x43\n?FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE\x10\x0b\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
22
+ b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xa4\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
23
23
  )
24
24
 
25
25
  _globals = globals()
@@ -38,13 +38,13 @@ if not _descriptor._USE_C_DESCRIPTORS:
38
38
  _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5752
39
39
  _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5931
40
40
  _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5934
41
- _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6595
42
- _globals["_EXECUTORSTATUS"]._serialized_start = 6598
43
- _globals["_EXECUTORSTATUS"]._serialized_end = 6763
44
- _globals["_TASKOUTCOMECODE"]._serialized_start = 6765
45
- _globals["_TASKOUTCOMECODE"]._serialized_end = 6875
46
- _globals["_TASKFAILUREREASON"]._serialized_start = 6878
47
- _globals["_TASKFAILUREREASON"]._serialized_end = 7188
41
+ _globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6466
42
+ _globals["_EXECUTORSTATUS"]._serialized_start = 6469
43
+ _globals["_EXECUTORSTATUS"]._serialized_end = 6634
44
+ _globals["_TASKOUTCOMECODE"]._serialized_start = 6636
45
+ _globals["_TASKOUTCOMECODE"]._serialized_end = 6746
46
+ _globals["_TASKFAILUREREASON"]._serialized_start = 6749
47
+ _globals["_TASKFAILUREREASON"]._serialized_end = 7059
48
48
  _globals["_DATAPAYLOAD"]._serialized_start = 55
49
49
  _globals["_DATAPAYLOAD"]._serialized_end = 290
50
50
  _globals["_GPURESOURCES"]._serialized_start = 292
@@ -83,6 +83,6 @@ if not _descriptor._USE_C_DESCRIPTORS:
83
83
  _globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4501
84
84
  _globals["_TASKRESULT"]._serialized_start = 4504
85
85
  _globals["_TASKRESULT"]._serialized_end = 5320
86
- _globals["_EXECUTORAPI"]._serialized_start = 7191
87
- _globals["_EXECUTORAPI"]._serialized_end = 7446
86
+ _globals["_EXECUTORAPI"]._serialized_start = 7062
87
+ _globals["_EXECUTORAPI"]._serialized_end = 7317
88
88
  # @@protoc_insertion_point(module_scope)
@@ -52,12 +52,6 @@ class FunctionExecutorTerminationReason(
52
52
  FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: _ClassVar[
53
53
  FunctionExecutorTerminationReason
54
54
  ]
55
- FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: _ClassVar[
56
- FunctionExecutorTerminationReason
57
- ]
58
- FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: _ClassVar[
59
- FunctionExecutorTerminationReason
60
- ]
61
55
  FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: _ClassVar[
62
56
  FunctionExecutorTerminationReason
63
57
  ]
@@ -121,12 +115,6 @@ FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR: (
121
115
  FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: (
122
116
  FunctionExecutorTerminationReason
123
117
  )
124
- FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: (
125
- FunctionExecutorTerminationReason
126
- )
127
- FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: (
128
- FunctionExecutorTerminationReason
129
- )
130
118
  FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: FunctionExecutorTerminationReason
131
119
  FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR: FunctionExecutorTerminationReason
132
120
  FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT: FunctionExecutorTerminationReason
@@ -1,28 +0,0 @@
1
- from typing import Any, Optional
2
-
3
- from indexify.executor.function_executor.function_executor import FunctionExecutor
4
- from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
5
-
6
- from .events import FunctionExecutorDestroyed
7
-
8
-
9
- async def destroy_function_executor(
10
- function_executor: Optional[FunctionExecutor],
11
- termination_reason: FunctionExecutorTerminationReason,
12
- logger: Any,
13
- ) -> FunctionExecutorDestroyed:
14
- """Destroys a function executor if it's not None.
15
-
16
- Doesn't raise any exceptions.
17
- """
18
- logger = logger.bind(module=__name__)
19
-
20
- if function_executor is not None:
21
- logger.info(
22
- "destroying function executor",
23
- )
24
- await function_executor.destroy()
25
-
26
- return FunctionExecutorDestroyed(
27
- is_success=True, termination_reason=termination_reason
28
- )
File without changes