indexify 0.4.10__tar.gz → 0.4.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.4.10 → indexify-0.4.11}/PKG-INFO +2 -2
- {indexify-0.4.10 → indexify-0.4.11}/pyproject.toml +2 -2
- indexify-0.4.11/src/indexify/executor/function_executor_controller/destroy_function_executor.py +31 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/events.py +3 -17
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/function_executor_controller.py +141 -112
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +25 -18
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/run_task.py +14 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/state_reconciler.py +2 -7
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api.proto +1 -4
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2.py +10 -10
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2.pyi +0 -12
- indexify-0.4.10/src/indexify/executor/function_executor_controller/destroy_function_executor.py +0 -28
- {indexify-0.4.10 → indexify-0.4.11}/README.md +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/__init__.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/build_image.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/deploy.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/cli/executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/README.md +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/channel_manager.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_allowlist.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/task_output.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/channel_manager.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/state_reconciler.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/metrics/state_reporter.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/state_reporter.py +0 -0
- {indexify-0.4.10 → indexify-0.4.11}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.11
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
|
|
17
17
|
Requires-Dist: boto3 (>=1.37.30,<2.0.0)
|
18
18
|
Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
|
19
19
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
20
|
-
Requires-Dist: tensorlake (==0.2.
|
20
|
+
Requires-Dist: tensorlake (==0.2.8)
|
21
21
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.4.
|
4
|
+
version = "0.4.11"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -25,7 +25,7 @@ prometheus-client = "^0.21.1"
|
|
25
25
|
psutil = "^7.0.0"
|
26
26
|
# Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
|
27
27
|
# We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
|
28
|
-
tensorlake = "0.2.
|
28
|
+
tensorlake = "0.2.8"
|
29
29
|
# Uncomment the next line to use local tensorlake package (only for development!)
|
30
30
|
# tensorlake = { path = "../tensorlake", develop = true }
|
31
31
|
# pydantic is provided by tensorlake
|
indexify-0.4.11/src/indexify/executor/function_executor_controller/destroy_function_executor.py
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Optional
|
3
|
+
|
4
|
+
from indexify.executor.function_executor.function_executor import FunctionExecutor
|
5
|
+
|
6
|
+
from .events import FunctionExecutorDestroyed
|
7
|
+
|
8
|
+
|
9
|
+
async def destroy_function_executor(
|
10
|
+
function_executor: Optional[FunctionExecutor],
|
11
|
+
lock: asyncio.Lock,
|
12
|
+
logger: Any,
|
13
|
+
) -> FunctionExecutorDestroyed:
|
14
|
+
"""Destroys the function executor if it's not None.
|
15
|
+
|
16
|
+
The supplied lock is used to ensure that if a destroy operation is in progress,
|
17
|
+
then another caller won't return immediately assuming that the destroy is complete
|
18
|
+
due to its idempotency.
|
19
|
+
|
20
|
+
Doesn't raise any exceptions.
|
21
|
+
"""
|
22
|
+
logger = logger.bind(module=__name__)
|
23
|
+
|
24
|
+
if function_executor is not None:
|
25
|
+
async with lock:
|
26
|
+
logger.info(
|
27
|
+
"destroying function executor",
|
28
|
+
)
|
29
|
+
await function_executor.destroy()
|
30
|
+
|
31
|
+
return FunctionExecutorDestroyed(is_success=True)
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/events.py
RENAMED
@@ -55,19 +55,12 @@ class FunctionExecutorDestroyed(BaseEvent):
|
|
55
55
|
Event indicating that Function Executor has been destroyed.
|
56
56
|
"""
|
57
57
|
|
58
|
-
def __init__(
|
59
|
-
self, is_success: bool, termination_reason: FunctionExecutorTerminationReason
|
60
|
-
):
|
58
|
+
def __init__(self, is_success: bool):
|
61
59
|
super().__init__(EventType.FUNCTION_EXECUTOR_DESTROYED)
|
62
60
|
self.is_success: bool = is_success
|
63
|
-
self.termination_reason: FunctionExecutorTerminationReason = termination_reason
|
64
61
|
|
65
62
|
def __str__(self) -> str:
|
66
|
-
return (
|
67
|
-
f"Event(type={self.event_type.name}, "
|
68
|
-
f"is_success={self.is_success}, "
|
69
|
-
f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
|
70
|
-
)
|
63
|
+
return f"Event(type={self.event_type.name}, " f"is_success={self.is_success})"
|
71
64
|
|
72
65
|
|
73
66
|
class ShutdownInitiated(BaseEvent):
|
@@ -75,15 +68,8 @@ class ShutdownInitiated(BaseEvent):
|
|
75
68
|
Event indicating that Function Executor shutdown has been initiated.
|
76
69
|
"""
|
77
70
|
|
78
|
-
def __init__(self
|
71
|
+
def __init__(self):
|
79
72
|
super().__init__(EventType.SHUTDOWN_INITIATED)
|
80
|
-
self.termination_reason: FunctionExecutorTerminationReason = termination_reason
|
81
|
-
|
82
|
-
def __str__(self) -> str:
|
83
|
-
return (
|
84
|
-
f"Event(type={self.event_type.name}, "
|
85
|
-
f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
|
86
|
-
)
|
87
73
|
|
88
74
|
|
89
75
|
class TaskPreparationFinished(BaseEvent):
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import time
|
3
3
|
from collections.abc import Coroutine
|
4
|
+
from enum import Enum
|
4
5
|
from pathlib import Path
|
5
6
|
from typing import Any, Dict, List, Optional
|
6
7
|
|
@@ -43,12 +44,14 @@ from .events import (
|
|
43
44
|
from .function_executor_startup_output import FunctionExecutorStartupOutput
|
44
45
|
from .loggers import function_executor_logger, task_allocation_logger
|
45
46
|
from .metrics.function_executor_controller import (
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED,
|
48
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING,
|
49
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP,
|
50
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED,
|
51
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING,
|
52
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN,
|
50
53
|
metric_control_loop_handle_event_latency,
|
51
|
-
|
54
|
+
metric_function_executors_with_state,
|
52
55
|
metric_runnable_tasks,
|
53
56
|
metric_runnable_tasks_per_function_name,
|
54
57
|
metric_schedule_task_latency,
|
@@ -61,6 +64,16 @@ from .task_output import TaskOutput
|
|
61
64
|
from .upload_task_output import upload_task_output
|
62
65
|
|
63
66
|
|
67
|
+
# Actual FE controller states, they are a bit different from statuses reported to the Server.
|
68
|
+
# All the valid state transitions are forward only (can skip multiple states in a row).
|
69
|
+
class _FE_CONTROLLER_STATE(Enum):
|
70
|
+
NOT_STARTED = 1
|
71
|
+
STARTING_UP = 2
|
72
|
+
RUNNING = 3
|
73
|
+
TERMINATING = 4
|
74
|
+
TERMINATED = 5
|
75
|
+
|
76
|
+
|
64
77
|
class FunctionExecutorController:
|
65
78
|
def __init__(
|
66
79
|
self,
|
@@ -94,19 +107,18 @@ class FunctionExecutorController:
|
|
94
107
|
self._logger: Any = function_executor_logger(
|
95
108
|
function_executor_description, logger.bind(module=__name__)
|
96
109
|
)
|
97
|
-
|
98
|
-
# the same event loop.
|
110
|
+
self._destroy_lock: asyncio.Lock = asyncio.Lock()
|
111
|
+
# Mutable state. No lock needed as it's modified by async tasks running in the same event loop.
|
99
112
|
self._fe: Optional[FunctionExecutor] = None
|
100
|
-
self._fe_termination_reason: FunctionExecutorTerminationReason =
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
self._status: FunctionExecutorStatus = (
|
105
|
-
FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_UNKNOWN
|
106
|
-
)
|
107
|
-
metric_function_executors_with_status.labels(
|
108
|
-
status=_to_fe_status_metric_label(self._status, self._logger)
|
113
|
+
self._fe_termination_reason: Optional[FunctionExecutorTerminationReason] = None
|
114
|
+
self._internal_state = _FE_CONTROLLER_STATE.NOT_STARTED
|
115
|
+
metric_function_executors_with_state.labels(
|
116
|
+
state=_to_fe_state_metric_label(self._internal_state, self._logger)
|
109
117
|
).inc()
|
118
|
+
self._reported_state: FunctionExecutorState = FunctionExecutorState(
|
119
|
+
description=function_executor_description,
|
120
|
+
status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_UNKNOWN,
|
121
|
+
)
|
110
122
|
# Ordered list of events to be processed by the control loop.
|
111
123
|
self._events: List[BaseEvent] = []
|
112
124
|
# Asyncio event used to notify the control loop that there are new events to process.
|
@@ -124,13 +136,6 @@ class FunctionExecutorController:
|
|
124
136
|
def function_executor_id(self) -> str:
|
125
137
|
return self._fe_description.id
|
126
138
|
|
127
|
-
def status(self) -> FunctionExecutorStatus:
|
128
|
-
"""Returns the current status of the Function Executor.
|
129
|
-
|
130
|
-
Not blocking.
|
131
|
-
"""
|
132
|
-
return self._status
|
133
|
-
|
134
139
|
def add_task_allocation(self, task_allocation: TaskAllocation) -> None:
|
135
140
|
"""Adds a task to the Function Executor.
|
136
141
|
|
@@ -205,9 +210,10 @@ class FunctionExecutorController:
|
|
205
210
|
"""Starts up the Function Executor and prepares it to run tasks.
|
206
211
|
|
207
212
|
Not blocking. Never raises exceptions."""
|
208
|
-
if self.
|
213
|
+
if self._internal_state != _FE_CONTROLLER_STATE.NOT_STARTED:
|
209
214
|
self._logger.warning(
|
210
|
-
"
|
215
|
+
"function executor state is not NOT_STARTED, ignoring startup call",
|
216
|
+
internal_state=self._internal_state.name,
|
211
217
|
)
|
212
218
|
return
|
213
219
|
|
@@ -215,7 +221,13 @@ class FunctionExecutorController:
|
|
215
221
|
self._control_loop(),
|
216
222
|
name="function executor control loop",
|
217
223
|
)
|
218
|
-
self.
|
224
|
+
self._update_internal_state(_FE_CONTROLLER_STATE.STARTING_UP)
|
225
|
+
self._update_reported_state(
|
226
|
+
FunctionExecutorState(
|
227
|
+
description=self._fe_description,
|
228
|
+
status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_PENDING,
|
229
|
+
)
|
230
|
+
)
|
219
231
|
next_aio = create_function_executor(
|
220
232
|
function_executor_description=self._fe_description,
|
221
233
|
function_executor_server_factory=self._fe_server_factory,
|
@@ -237,17 +249,13 @@ class FunctionExecutorController:
|
|
237
249
|
),
|
238
250
|
)
|
239
251
|
|
240
|
-
async def shutdown(
|
241
|
-
self, termination_reason: FunctionExecutorTerminationReason
|
242
|
-
) -> None:
|
252
|
+
async def shutdown(self) -> None:
|
243
253
|
"""Shutsdown the Function Executor and frees all of its resources.
|
244
254
|
|
245
|
-
|
255
|
+
No task outcomes and outputs are getting reported to Server after this call.
|
246
256
|
Doesn't raise any exceptions. Blocks until the shutdown is complete.
|
247
257
|
"""
|
248
|
-
self._add_event(
|
249
|
-
ShutdownInitiated(termination_reason=termination_reason), source="shutdown"
|
250
|
-
)
|
258
|
+
self._add_event(ShutdownInitiated(), source="shutdown")
|
251
259
|
try:
|
252
260
|
await self._control_loop_aio_task
|
253
261
|
except asyncio.CancelledError:
|
@@ -259,51 +267,49 @@ class FunctionExecutorController:
|
|
259
267
|
)
|
260
268
|
self._logger.info("function executor controller shutdown finished")
|
261
269
|
|
262
|
-
def
|
270
|
+
def _update_internal_state(self, new_state: _FE_CONTROLLER_STATE) -> None:
|
271
|
+
"""Updates the internal state of the Function Executor Controller.
|
272
|
+
|
273
|
+
Not blocking. Never raises exceptions."""
|
274
|
+
old_state: _FE_CONTROLLER_STATE = self._internal_state
|
275
|
+
self._internal_state = new_state
|
276
|
+
|
277
|
+
self._logger.info(
|
278
|
+
"function executor internal state changed",
|
279
|
+
old_state=old_state.name,
|
280
|
+
new_state=new_state.name,
|
281
|
+
)
|
282
|
+
|
283
|
+
metric_function_executors_with_state.labels(
|
284
|
+
state=_to_fe_state_metric_label(old_state, self._logger)
|
285
|
+
).dec()
|
286
|
+
metric_function_executors_with_state.labels(
|
287
|
+
state=_to_fe_state_metric_label(new_state, self._logger)
|
288
|
+
).inc()
|
289
|
+
|
290
|
+
def _update_reported_state(
|
263
291
|
self,
|
264
|
-
|
292
|
+
new_state: FunctionExecutorState,
|
265
293
|
) -> None:
|
266
|
-
"""Sets Function Executor
|
294
|
+
"""Sets new Function Executor state and reports it to the Server.
|
267
295
|
|
268
296
|
Not blocking. Never raises exceptions."""
|
269
|
-
|
270
|
-
|
271
|
-
self._status: FunctionExecutorStatus = new_status
|
297
|
+
old_state: FunctionExecutorState = self._reported_state
|
298
|
+
self._reported_state = new_state
|
272
299
|
|
273
300
|
self._logger.info(
|
274
|
-
"function executor status changed",
|
275
|
-
old_status=FunctionExecutorStatus.Name(
|
276
|
-
new_status=FunctionExecutorStatus.Name(
|
301
|
+
"function executor grpc status changed",
|
302
|
+
old_status=FunctionExecutorStatus.Name(old_state.status),
|
303
|
+
new_status=FunctionExecutorStatus.Name(new_state.status),
|
277
304
|
termination_reason=_termination_reason_to_short_name(
|
278
|
-
|
305
|
+
new_state.termination_reason
|
279
306
|
),
|
280
307
|
)
|
281
|
-
metric_function_executors_with_status.labels(
|
282
|
-
status=_to_fe_status_metric_label(old_status, self._logger)
|
283
|
-
).dec()
|
284
|
-
metric_function_executors_with_status.labels(
|
285
|
-
status=_to_fe_status_metric_label(new_status, self._logger)
|
286
|
-
).inc()
|
287
308
|
|
288
|
-
self._state_reporter.update_function_executor_state(
|
309
|
+
self._state_reporter.update_function_executor_state(new_state)
|
289
310
|
# Report the status change to the Server asap to reduce latency in the system.
|
290
311
|
self._state_reporter.schedule_state_report()
|
291
312
|
|
292
|
-
def _current_state(self) -> FunctionExecutorState:
|
293
|
-
"""Returns the current state of the Function Executor.
|
294
|
-
|
295
|
-
Not blocking. Never raises exceptions.
|
296
|
-
"""
|
297
|
-
termination_reason: Optional[FunctionExecutorTerminationReason] = None
|
298
|
-
if self._fe_termination_reason is not None:
|
299
|
-
termination_reason = self._fe_termination_reason
|
300
|
-
|
301
|
-
return FunctionExecutorState(
|
302
|
-
description=self._fe_description,
|
303
|
-
status=self._status,
|
304
|
-
termination_reason=termination_reason,
|
305
|
-
)
|
306
|
-
|
307
313
|
async def _control_loop(self) -> None:
|
308
314
|
"""Runs control loop that coordinates all the work done by the Function Executor.
|
309
315
|
|
@@ -454,13 +460,17 @@ class FunctionExecutorController:
|
|
454
460
|
self._state_reporter.schedule_state_report()
|
455
461
|
|
456
462
|
if event.function_executor is None:
|
457
|
-
self.
|
458
|
-
event.output.termination_reason
|
459
|
-
)
|
463
|
+
self._start_termination(termination_reason=event.output.termination_reason)
|
460
464
|
return
|
461
465
|
|
462
466
|
self._fe = event.function_executor
|
463
|
-
self.
|
467
|
+
self._update_internal_state(_FE_CONTROLLER_STATE.RUNNING)
|
468
|
+
self._update_reported_state(
|
469
|
+
FunctionExecutorState(
|
470
|
+
description=self._fe_description,
|
471
|
+
status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_RUNNING,
|
472
|
+
)
|
473
|
+
)
|
464
474
|
# Health checker starts after FE creation and gets automatically stopped on FE destroy.
|
465
475
|
self._fe.health_checker().start(self._health_check_failed_callback)
|
466
476
|
self._add_event(
|
@@ -479,9 +489,18 @@ class FunctionExecutorController:
|
|
479
489
|
self._logger.error(
|
480
490
|
"Function Executor destroy failed unexpectedly, this should never happen",
|
481
491
|
)
|
482
|
-
|
483
|
-
self.
|
484
|
-
|
492
|
+
|
493
|
+
self._fe = None
|
494
|
+
# Set reported status only after the FE got destroyed because Server assumes that all FE resources are freed when the status changes.
|
495
|
+
self._update_reported_state(
|
496
|
+
FunctionExecutorState(
|
497
|
+
description=self._fe_description,
|
498
|
+
status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED,
|
499
|
+
termination_reason=self._fe_termination_reason,
|
500
|
+
)
|
501
|
+
)
|
502
|
+
self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATED)
|
503
|
+
|
485
504
|
# Invoke the scheduler so it can fail runnable tasks with FE Terminated error.
|
486
505
|
self._add_event(
|
487
506
|
ScheduleTaskExecution(),
|
@@ -493,7 +512,7 @@ class FunctionExecutorController:
|
|
493
512
|
"Function Executor health check failed, terminating Function Executor",
|
494
513
|
reason=result.reason,
|
495
514
|
)
|
496
|
-
self.
|
515
|
+
self._start_termination(
|
497
516
|
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY
|
498
517
|
)
|
499
518
|
|
@@ -532,14 +551,15 @@ class FunctionExecutorController:
|
|
532
551
|
if len(self._runnable_tasks) == 0:
|
533
552
|
return
|
534
553
|
|
535
|
-
if self.
|
536
|
-
|
537
|
-
|
554
|
+
if self._internal_state not in [
|
555
|
+
_FE_CONTROLLER_STATE.RUNNING,
|
556
|
+
_FE_CONTROLLER_STATE.TERMINATING,
|
557
|
+
_FE_CONTROLLER_STATE.TERMINATED,
|
538
558
|
]:
|
539
|
-
return # Can't progress
|
559
|
+
return # Can't progress runnable tasks in the current state.
|
540
560
|
|
541
561
|
if (
|
542
|
-
self.
|
562
|
+
self._internal_state == _FE_CONTROLLER_STATE.RUNNING
|
543
563
|
and self._running_task is not None
|
544
564
|
):
|
545
565
|
return
|
@@ -555,12 +575,15 @@ class FunctionExecutorController:
|
|
555
575
|
if task_info.is_cancelled:
|
556
576
|
task_info.output = TaskOutput.task_cancelled(task_info.allocation)
|
557
577
|
self._start_task_output_upload(task_info)
|
558
|
-
elif self.
|
578
|
+
elif self._internal_state in [
|
579
|
+
_FE_CONTROLLER_STATE.TERMINATING,
|
580
|
+
_FE_CONTROLLER_STATE.TERMINATED,
|
581
|
+
]:
|
559
582
|
task_info.output = TaskOutput.function_executor_terminated(
|
560
583
|
task_info.allocation
|
561
584
|
)
|
562
585
|
self._start_task_output_upload(task_info)
|
563
|
-
elif self.
|
586
|
+
elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
|
564
587
|
self._running_task = task_info
|
565
588
|
next_aio = run_task_on_function_executor(
|
566
589
|
task_info=task_info,
|
@@ -603,7 +626,7 @@ class FunctionExecutorController:
|
|
603
626
|
ScheduleTaskExecution(), source="_handle_event_task_execution_finished"
|
604
627
|
)
|
605
628
|
else:
|
606
|
-
self.
|
629
|
+
self._start_termination(
|
607
630
|
termination_reason=event.function_executor_termination_reason
|
608
631
|
)
|
609
632
|
|
@@ -660,24 +683,31 @@ class FunctionExecutorController:
|
|
660
683
|
)
|
661
684
|
self._state_reporter.schedule_state_report()
|
662
685
|
|
663
|
-
def
|
686
|
+
def _start_termination(
|
664
687
|
self, termination_reason: FunctionExecutorTerminationReason
|
665
688
|
) -> None:
|
666
|
-
"""
|
689
|
+
"""Starts termination of the Function Executor if it's not started yet.
|
667
690
|
|
668
691
|
Doesn't raise any exceptions. Doesn't block.
|
669
692
|
"""
|
693
|
+
if self._internal_state in [
|
694
|
+
_FE_CONTROLLER_STATE.TERMINATING,
|
695
|
+
_FE_CONTROLLER_STATE.TERMINATED,
|
696
|
+
]:
|
697
|
+
# _start_termination() can be called multiple times, e.g. by each failed task alloc
|
698
|
+
# when the FE is unhealthy. Dedup the calls to keep state machine consistent.
|
699
|
+
return
|
700
|
+
|
701
|
+
self._fe_termination_reason = termination_reason
|
702
|
+
self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATING)
|
670
703
|
next_aio = destroy_function_executor(
|
671
704
|
function_executor=self._fe,
|
672
|
-
|
705
|
+
lock=self._destroy_lock,
|
673
706
|
logger=self._logger,
|
674
707
|
)
|
675
|
-
self._fe = None
|
676
708
|
self._spawn_aio_for_fe(
|
677
709
|
aio=next_aio,
|
678
|
-
on_exception=FunctionExecutorDestroyed(
|
679
|
-
is_success=False, termination_reason=termination_reason
|
680
|
-
),
|
710
|
+
on_exception=FunctionExecutorDestroyed(is_success=False),
|
681
711
|
)
|
682
712
|
|
683
713
|
async def _shutdown_no_exceptions(self, event: ShutdownInitiated) -> None:
|
@@ -716,16 +746,15 @@ class FunctionExecutorController:
|
|
716
746
|
# BaseException includes asyncio.CancelledError which is always raised here.
|
717
747
|
pass
|
718
748
|
|
719
|
-
|
720
|
-
self.
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
status=_to_fe_status_metric_label(self._status, self._logger)
|
749
|
+
await destroy_function_executor(
|
750
|
+
function_executor=self._fe,
|
751
|
+
lock=self._destroy_lock,
|
752
|
+
logger=self._logger,
|
753
|
+
)
|
754
|
+
|
755
|
+
# Cleanup the metric from this FE.
|
756
|
+
metric_function_executors_with_state.labels(
|
757
|
+
state=_to_fe_state_metric_label(self._internal_state, self._logger)
|
729
758
|
).dec()
|
730
759
|
|
731
760
|
self._state_reporter.remove_function_executor_state(self.function_executor_id())
|
@@ -735,21 +764,23 @@ class FunctionExecutorController:
|
|
735
764
|
debug_print_events(events=self._events, logger=self._logger)
|
736
765
|
|
737
766
|
|
738
|
-
def
|
739
|
-
if
|
740
|
-
return
|
741
|
-
elif
|
742
|
-
return
|
743
|
-
elif
|
744
|
-
return
|
745
|
-
elif
|
746
|
-
return
|
767
|
+
def _to_fe_state_metric_label(state: _FE_CONTROLLER_STATE, logger: Any) -> str:
|
768
|
+
if state == _FE_CONTROLLER_STATE.NOT_STARTED:
|
769
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED
|
770
|
+
elif state == _FE_CONTROLLER_STATE.STARTING_UP:
|
771
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP
|
772
|
+
elif state == _FE_CONTROLLER_STATE.RUNNING:
|
773
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING
|
774
|
+
elif state == _FE_CONTROLLER_STATE.TERMINATING:
|
775
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING
|
776
|
+
elif state == _FE_CONTROLLER_STATE.TERMINATED:
|
777
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED
|
747
778
|
else:
|
748
779
|
logger.error(
|
749
|
-
"unexpected Function Executor
|
750
|
-
|
780
|
+
"unexpected Function Executor internal state",
|
781
|
+
state=state.name,
|
751
782
|
)
|
752
|
-
return
|
783
|
+
return METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN
|
753
784
|
|
754
785
|
|
755
786
|
_termination_reason_to_short_name_map = {
|
@@ -757,8 +788,6 @@ _termination_reason_to_short_name_map = {
|
|
757
788
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR: "STARTUP_FAILED_INTERNAL_ERROR",
|
758
789
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR: "STARTUP_FAILED_FUNCTION_ERROR",
|
759
790
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: "STARTUP_FAILED_FUNCTION_TIMEOUT",
|
760
|
-
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: "EXECUTOR_SHUTDOWN",
|
761
|
-
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: "REMOVED_FROM_DESIRED_STATE",
|
762
791
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: "UNHEALTHY",
|
763
792
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR: "INTERNAL_ERROR",
|
764
793
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT: "FUNCTION_TIMEOUT",
|
@@ -34,27 +34,34 @@ metric_runnable_tasks_per_function_name: prometheus_client.Gauge = (
|
|
34
34
|
)
|
35
35
|
)
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
["status"],
|
42
|
-
)
|
37
|
+
metric_function_executors_with_state: prometheus_client.Gauge = prometheus_client.Gauge(
|
38
|
+
"function_executors_with_state",
|
39
|
+
"Number of Function Executors with a particular internal state",
|
40
|
+
["state"],
|
43
41
|
)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
42
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN = "unknown"
|
43
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED = "not_started"
|
44
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP = "starting_up"
|
45
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING = "running"
|
46
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING = "terminating"
|
47
|
+
METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED = "terminated"
|
48
|
+
|
48
49
|
|
49
|
-
|
50
|
-
|
50
|
+
metric_function_executors_with_state.labels(
|
51
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN
|
52
|
+
)
|
53
|
+
metric_function_executors_with_state.labels(
|
54
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED
|
55
|
+
)
|
56
|
+
metric_function_executors_with_state.labels(
|
57
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_STARTING_UP
|
51
58
|
)
|
52
|
-
|
53
|
-
|
59
|
+
metric_function_executors_with_state.labels(
|
60
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_RUNNING
|
54
61
|
)
|
55
|
-
|
56
|
-
|
62
|
+
metric_function_executors_with_state.labels(
|
63
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATING
|
57
64
|
)
|
58
|
-
|
59
|
-
|
65
|
+
metric_function_executors_with_state.labels(
|
66
|
+
state=METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_TERMINATED
|
60
67
|
)
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/run_task.py
RENAMED
@@ -1,4 +1,6 @@
|
|
1
1
|
import asyncio
|
2
|
+
import os
|
3
|
+
import random
|
2
4
|
import time
|
3
5
|
from typing import Any, Optional
|
4
6
|
|
@@ -38,6 +40,10 @@ from .metrics.run_task import (
|
|
38
40
|
from .task_info import TaskInfo
|
39
41
|
from .task_output import TaskMetrics, TaskOutput
|
40
42
|
|
43
|
+
_ENABLE_INJECT_TASK_CANCELLATIONS = (
|
44
|
+
os.getenv("INDEXIFY_INJECT_TASK_CANCELLATIONS", "0") == "1"
|
45
|
+
)
|
46
|
+
|
41
47
|
|
42
48
|
async def run_task_on_function_executor(
|
43
49
|
task_info: TaskInfo, function_executor: FunctionExecutor, logger: Any
|
@@ -177,6 +183,14 @@ def _task_output_from_function_executor_response(
|
|
177
183
|
response_validator.required_field("invocation_error_output")
|
178
184
|
invocation_error_output = response.invocation_error_output
|
179
185
|
|
186
|
+
if _ENABLE_INJECT_TASK_CANCELLATIONS:
|
187
|
+
logger.warning("injecting cancellation failure for the task allocation")
|
188
|
+
if (
|
189
|
+
random.random() < 0.5
|
190
|
+
): # 50% chance to get stable reproduction in manual testing
|
191
|
+
outcome_code = TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE
|
192
|
+
failure_reason = TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED
|
193
|
+
|
180
194
|
return TaskOutput(
|
181
195
|
allocation=allocation,
|
182
196
|
outcome_code=outcome_code,
|
@@ -7,7 +7,6 @@ from tensorlake.function_executor.proto.message_validator import MessageValidato
|
|
7
7
|
from indexify.proto.executor_api_pb2 import (
|
8
8
|
DesiredExecutorState,
|
9
9
|
FunctionExecutorDescription,
|
10
|
-
FunctionExecutorTerminationReason,
|
11
10
|
GetDesiredExecutorStatesRequest,
|
12
11
|
TaskAllocation,
|
13
12
|
)
|
@@ -123,9 +122,7 @@ class ExecutorStateReconciler:
|
|
123
122
|
for fe_controller in self._function_executor_controllers.values():
|
124
123
|
fe_shutdown_tasks.append(
|
125
124
|
asyncio.create_task(
|
126
|
-
fe_controller.shutdown(
|
127
|
-
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN
|
128
|
-
),
|
125
|
+
fe_controller.shutdown(),
|
129
126
|
name=f"Shutdown Function Executor {fe_controller.function_executor_id()}",
|
130
127
|
)
|
131
128
|
)
|
@@ -327,9 +324,7 @@ class ExecutorStateReconciler:
|
|
327
324
|
self._function_executor_controllers.pop(function_executor_id)
|
328
325
|
)
|
329
326
|
asyncio.create_task(
|
330
|
-
fe_controller.shutdown(
|
331
|
-
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE
|
332
|
-
),
|
327
|
+
fe_controller.shutdown(),
|
333
328
|
name=f"Shutdown Function Executor {function_executor_id}",
|
334
329
|
)
|
335
330
|
|
@@ -69,6 +69,7 @@ enum FunctionExecutorStatus {
|
|
69
69
|
FUNCTION_EXECUTOR_STATUS_TERMINATED = 3;
|
70
70
|
}
|
71
71
|
|
72
|
+
// The reasons why an Executor decided to terminate a Function Executor.
|
72
73
|
enum FunctionExecutorTerminationReason {
|
73
74
|
FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN = 0;
|
74
75
|
// Internal error aka platform error on FE startup.
|
@@ -79,10 +80,6 @@ enum FunctionExecutorTerminationReason {
|
|
79
80
|
// Timeout on FE startup while running the function constructor.
|
80
81
|
FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT = 3;
|
81
82
|
|
82
|
-
// FE was terminated on Executor shutdown.
|
83
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN = 10;
|
84
|
-
// FE was removed from the Executor desired state by Server.
|
85
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE = 11;
|
86
83
|
// FE was terminated because it failed a health check.
|
87
84
|
FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY = 12;
|
88
85
|
// FE was terminated due to an unrecoverable internal error on Executor.
|
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
|
|
19
19
|
|
20
20
|
|
21
21
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xa4\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x95\x05\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12:\n6FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN\x10\n\x12\x43\n?FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE\x10\x0b\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xa4\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
23
23
|
)
|
24
24
|
|
25
25
|
_globals = globals()
|
@@ -38,13 +38,13 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
38
38
|
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5752
|
39
39
|
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5931
|
40
40
|
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5934
|
41
|
-
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end =
|
42
|
-
_globals["_EXECUTORSTATUS"]._serialized_start =
|
43
|
-
_globals["_EXECUTORSTATUS"]._serialized_end =
|
44
|
-
_globals["_TASKOUTCOMECODE"]._serialized_start =
|
45
|
-
_globals["_TASKOUTCOMECODE"]._serialized_end =
|
46
|
-
_globals["_TASKFAILUREREASON"]._serialized_start =
|
47
|
-
_globals["_TASKFAILUREREASON"]._serialized_end =
|
41
|
+
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6466
|
42
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 6469
|
43
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 6634
|
44
|
+
_globals["_TASKOUTCOMECODE"]._serialized_start = 6636
|
45
|
+
_globals["_TASKOUTCOMECODE"]._serialized_end = 6746
|
46
|
+
_globals["_TASKFAILUREREASON"]._serialized_start = 6749
|
47
|
+
_globals["_TASKFAILUREREASON"]._serialized_end = 7059
|
48
48
|
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
49
|
_globals["_DATAPAYLOAD"]._serialized_end = 290
|
50
50
|
_globals["_GPURESOURCES"]._serialized_start = 292
|
@@ -83,6 +83,6 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
83
83
|
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4501
|
84
84
|
_globals["_TASKRESULT"]._serialized_start = 4504
|
85
85
|
_globals["_TASKRESULT"]._serialized_end = 5320
|
86
|
-
_globals["_EXECUTORAPI"]._serialized_start =
|
87
|
-
_globals["_EXECUTORAPI"]._serialized_end =
|
86
|
+
_globals["_EXECUTORAPI"]._serialized_start = 7062
|
87
|
+
_globals["_EXECUTORAPI"]._serialized_end = 7317
|
88
88
|
# @@protoc_insertion_point(module_scope)
|
@@ -52,12 +52,6 @@ class FunctionExecutorTerminationReason(
|
|
52
52
|
FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: _ClassVar[
|
53
53
|
FunctionExecutorTerminationReason
|
54
54
|
]
|
55
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: _ClassVar[
|
56
|
-
FunctionExecutorTerminationReason
|
57
|
-
]
|
58
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: _ClassVar[
|
59
|
-
FunctionExecutorTerminationReason
|
60
|
-
]
|
61
55
|
FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: _ClassVar[
|
62
56
|
FunctionExecutorTerminationReason
|
63
57
|
]
|
@@ -121,12 +115,6 @@ FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR: (
|
|
121
115
|
FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT: (
|
122
116
|
FunctionExecutorTerminationReason
|
123
117
|
)
|
124
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN: (
|
125
|
-
FunctionExecutorTerminationReason
|
126
|
-
)
|
127
|
-
FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE: (
|
128
|
-
FunctionExecutorTerminationReason
|
129
|
-
)
|
130
118
|
FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY: FunctionExecutorTerminationReason
|
131
119
|
FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR: FunctionExecutorTerminationReason
|
132
120
|
FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT: FunctionExecutorTerminationReason
|
indexify-0.4.10/src/indexify/executor/function_executor_controller/destroy_function_executor.py
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
from typing import Any, Optional
|
2
|
-
|
3
|
-
from indexify.executor.function_executor.function_executor import FunctionExecutor
|
4
|
-
from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
|
5
|
-
|
6
|
-
from .events import FunctionExecutorDestroyed
|
7
|
-
|
8
|
-
|
9
|
-
async def destroy_function_executor(
|
10
|
-
function_executor: Optional[FunctionExecutor],
|
11
|
-
termination_reason: FunctionExecutorTerminationReason,
|
12
|
-
logger: Any,
|
13
|
-
) -> FunctionExecutorDestroyed:
|
14
|
-
"""Destroys a function executor if it's not None.
|
15
|
-
|
16
|
-
Doesn't raise any exceptions.
|
17
|
-
"""
|
18
|
-
logger = logger.bind(module=__name__)
|
19
|
-
|
20
|
-
if function_executor is not None:
|
21
|
-
logger.info(
|
22
|
-
"destroying function executor",
|
23
|
-
)
|
24
|
-
await function_executor.destroy()
|
25
|
-
|
26
|
-
return FunctionExecutorDestroyed(
|
27
|
-
is_success=True, termination_reason=termination_reason
|
28
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/downloads.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/loggers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/function_executor_controller/task_info.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.10 → indexify-0.4.11}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|