indexify 0.4.11__tar.gz → 0.4.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.4.11 → indexify-0.4.13}/PKG-INFO +2 -2
- {indexify-0.4.11 → indexify-0.4.13}/pyproject.toml +2 -2
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/create_function_executor.py +10 -15
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/events.py +23 -7
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_controller.py +67 -25
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_output.py +53 -1
- indexify-0.4.11/src/indexify/executor/function_executor_controller/destroy_function_executor.py → indexify-0.4.13/src/indexify/executor/function_executor_controller/terminate_function_executor.py +12 -5
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/proto/executor_api.proto +1 -0
- indexify-0.4.13/src/indexify/proto/executor_api_pb2.py +88 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2.pyi +9 -1
- indexify-0.4.11/src/indexify/proto/executor_api_pb2.py +0 -88
- {indexify-0.4.11 → indexify-0.4.13}/README.md +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/cli/__init__.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/cli/build_image.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/cli/deploy.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/cli/executor.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/README.md +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/channel_manager.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/executor.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_allowlist.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/run_task.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/metrics/channel_manager.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/metrics/state_reconciler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/metrics/state_reporter.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/state_reconciler.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/state_reporter.py +0 -0
- {indexify-0.4.11 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.13
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
|
|
17
17
|
Requires-Dist: boto3 (>=1.37.30,<2.0.0)
|
18
18
|
Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
|
19
19
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
20
|
-
Requires-Dist: tensorlake (==0.2.
|
20
|
+
Requires-Dist: tensorlake (==0.2.12)
|
21
21
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.4.
|
4
|
+
version = "0.4.13"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -25,7 +25,7 @@ prometheus-client = "^0.21.1"
|
|
25
25
|
psutil = "^7.0.0"
|
26
26
|
# Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
|
27
27
|
# We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
|
28
|
-
tensorlake = "0.2.
|
28
|
+
tensorlake = "0.2.12"
|
29
29
|
# Uncomment the next line to use local tensorlake package (only for development!)
|
30
30
|
# tensorlake = { path = "../tensorlake", develop = true }
|
31
31
|
# pydantic is provided by tensorlake
|
@@ -5,7 +5,6 @@ from typing import Any, Optional, Tuple
|
|
5
5
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
6
6
|
InitializeRequest,
|
7
7
|
SerializedObject,
|
8
|
-
SerializedObjectEncoding,
|
9
8
|
)
|
10
9
|
|
11
10
|
from indexify.executor.blob_store.blob_store import BLOBStore
|
@@ -73,25 +72,21 @@ async def create_function_executor(
|
|
73
72
|
except BaseException as e:
|
74
73
|
if isinstance(e, asyncio.CancelledError):
|
75
74
|
logger.info("function executor startup was cancelled")
|
76
|
-
return FunctionExecutorCreated(
|
77
|
-
function_executor=None,
|
78
|
-
output=FunctionExecutorStartupOutput(
|
79
|
-
function_executor_description=function_executor_description,
|
80
|
-
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE,
|
81
|
-
),
|
82
|
-
)
|
83
75
|
else:
|
84
76
|
logger.error(
|
85
77
|
"failed to create function executor due to platform error",
|
86
78
|
exc_info=e,
|
87
79
|
)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
80
|
+
|
81
|
+
# Cancelled FE startup means that Server removed it from desired state so it doesn't matter what termination_reason we return
|
82
|
+
# in this case cause this FE will be removed from Executor reported state.
|
83
|
+
return FunctionExecutorCreated(
|
84
|
+
function_executor=None,
|
85
|
+
output=FunctionExecutorStartupOutput(
|
86
|
+
function_executor_description=function_executor_description,
|
87
|
+
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
|
88
|
+
),
|
89
|
+
)
|
95
90
|
|
96
91
|
|
97
92
|
async def _initialization_result_to_fe_creation_output(
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/events.py
RENAMED
@@ -1,5 +1,5 @@
|
|
1
1
|
from enum import Enum
|
2
|
-
from typing import Optional
|
2
|
+
from typing import List, Optional
|
3
3
|
|
4
4
|
from indexify.executor.function_executor.function_executor import (
|
5
5
|
FunctionExecutor,
|
@@ -12,7 +12,7 @@ from .task_info import TaskInfo
|
|
12
12
|
|
13
13
|
class EventType(Enum):
|
14
14
|
FUNCTION_EXECUTOR_CREATED = 1
|
15
|
-
|
15
|
+
FUNCTION_EXECUTOR_TERMINATED = 2
|
16
16
|
SHUTDOWN_INITIATED = 3
|
17
17
|
TASK_PREPARATION_FINISHED = 4
|
18
18
|
SCHEDULE_TASK_EXECUTION = 5
|
@@ -50,17 +50,33 @@ class FunctionExecutorCreated(BaseEvent):
|
|
50
50
|
self.output: FunctionExecutorStartupOutput = output
|
51
51
|
|
52
52
|
|
53
|
-
class
|
53
|
+
class FunctionExecutorTerminated(BaseEvent):
|
54
54
|
"""
|
55
|
-
Event indicating that Function Executor has been destroyed.
|
55
|
+
Event indicating that Function Executor has been terminated (destroyed).
|
56
56
|
"""
|
57
57
|
|
58
|
-
def __init__(
|
59
|
-
|
58
|
+
def __init__(
|
59
|
+
self,
|
60
|
+
is_success: bool,
|
61
|
+
fe_termination_reason: FunctionExecutorTerminationReason,
|
62
|
+
allocation_ids_caused_termination: List[str],
|
63
|
+
):
|
64
|
+
super().__init__(EventType.FUNCTION_EXECUTOR_TERMINATED)
|
60
65
|
self.is_success: bool = is_success
|
66
|
+
self.fe_termination_reason: FunctionExecutorTerminationReason = (
|
67
|
+
fe_termination_reason
|
68
|
+
)
|
69
|
+
self.allocation_ids_caused_termination: List[str] = (
|
70
|
+
allocation_ids_caused_termination
|
71
|
+
)
|
61
72
|
|
62
73
|
def __str__(self) -> str:
|
63
|
-
return
|
74
|
+
return (
|
75
|
+
f"Event(type={self.event_type.name}, "
|
76
|
+
f"is_success={self.is_success}, "
|
77
|
+
f"fe_termination_reason={FunctionExecutorTerminationReason.Name(self.fe_termination_reason)}, "
|
78
|
+
f"allocation_ids_caused_termination={self.allocation_ids_caused_termination})"
|
79
|
+
)
|
64
80
|
|
65
81
|
|
66
82
|
class ShutdownInitiated(BaseEvent):
|
@@ -29,12 +29,11 @@ from .debug_event_loop import (
|
|
29
29
|
debug_print_events,
|
30
30
|
debug_print_processing_event,
|
31
31
|
)
|
32
|
-
from .destroy_function_executor import destroy_function_executor
|
33
32
|
from .events import (
|
34
33
|
BaseEvent,
|
35
34
|
EventType,
|
36
35
|
FunctionExecutorCreated,
|
37
|
-
|
36
|
+
FunctionExecutorTerminated,
|
38
37
|
ScheduleTaskExecution,
|
39
38
|
ShutdownInitiated,
|
40
39
|
TaskExecutionFinished,
|
@@ -61,6 +60,7 @@ from .prepare_task import prepare_task
|
|
61
60
|
from .run_task import run_task_on_function_executor
|
62
61
|
from .task_info import TaskInfo
|
63
62
|
from .task_output import TaskOutput
|
63
|
+
from .terminate_function_executor import terminate_function_executor
|
64
64
|
from .upload_task_output import upload_task_output
|
65
65
|
|
66
66
|
|
@@ -110,7 +110,6 @@ class FunctionExecutorController:
|
|
110
110
|
self._destroy_lock: asyncio.Lock = asyncio.Lock()
|
111
111
|
# Mutable state. No lock needed as it's modified by async tasks running in the same event loop.
|
112
112
|
self._fe: Optional[FunctionExecutor] = None
|
113
|
-
self._fe_termination_reason: Optional[FunctionExecutorTerminationReason] = None
|
114
113
|
self._internal_state = _FE_CONTROLLER_STATE.NOT_STARTED
|
115
114
|
metric_function_executors_with_state.labels(
|
116
115
|
state=_to_fe_state_metric_label(self._internal_state, self._logger)
|
@@ -347,8 +346,8 @@ class FunctionExecutorController:
|
|
347
346
|
"""
|
348
347
|
if event.event_type == EventType.FUNCTION_EXECUTOR_CREATED:
|
349
348
|
return self._handle_event_function_executor_created(event)
|
350
|
-
elif event.event_type == EventType.
|
351
|
-
return self.
|
349
|
+
elif event.event_type == EventType.FUNCTION_EXECUTOR_TERMINATED:
|
350
|
+
return self._handle_event_function_executor_terminated(event)
|
352
351
|
elif event.event_type == EventType.TASK_PREPARATION_FINISHED:
|
353
352
|
return self._handle_event_task_preparation_finished(event)
|
354
353
|
elif event.event_type == EventType.SCHEDULE_TASK_EXECUTION:
|
@@ -460,7 +459,28 @@ class FunctionExecutorController:
|
|
460
459
|
self._state_reporter.schedule_state_report()
|
461
460
|
|
462
461
|
if event.function_executor is None:
|
463
|
-
|
462
|
+
# Server needs to increment attempts counter for all the tasks that were pending while FE was starting up.
|
463
|
+
# This prevents infinite retries if FEs consistently fail to start up.
|
464
|
+
# The allocations we marked here also need to not used FE terminated failure reason in their outputs
|
465
|
+
# because FE terminated means that the allocation wasn't the cause of the FE termination.
|
466
|
+
allocation_ids_caused_termination: List[str] = []
|
467
|
+
for task_info in self._tasks.values():
|
468
|
+
task_logger = task_allocation_logger(task_info.allocation, self._logger)
|
469
|
+
task_logger.info(
|
470
|
+
"marking allocation failed on function executor startup failure"
|
471
|
+
)
|
472
|
+
allocation_ids_caused_termination.append(
|
473
|
+
task_info.allocation.allocation_id
|
474
|
+
)
|
475
|
+
task_info.output = TaskOutput.function_executor_startup_failed(
|
476
|
+
allocation=task_info.allocation,
|
477
|
+
fe_startup_output=event.output,
|
478
|
+
logger=task_logger,
|
479
|
+
)
|
480
|
+
self._start_termination(
|
481
|
+
fe_termination_reason=event.output.termination_reason,
|
482
|
+
allocation_ids_caused_termination=allocation_ids_caused_termination,
|
483
|
+
)
|
464
484
|
return
|
465
485
|
|
466
486
|
self._fe = event.function_executor
|
@@ -478,16 +498,16 @@ class FunctionExecutorController:
|
|
478
498
|
source="_handle_event_function_executor_created",
|
479
499
|
)
|
480
500
|
|
481
|
-
def
|
482
|
-
self, event:
|
501
|
+
def _handle_event_function_executor_terminated(
|
502
|
+
self, event: FunctionExecutorTerminated
|
483
503
|
) -> None:
|
484
|
-
"""Handles the Function Executor
|
504
|
+
"""Handles the Function Executor terminated event.
|
485
505
|
|
486
506
|
Doesn't raise any exceptions. Doesn't block.
|
487
507
|
"""
|
488
508
|
if not event.is_success:
|
489
509
|
self._logger.error(
|
490
|
-
"Function Executor
|
510
|
+
"Function Executor termination failed unexpectedly, this should never happen",
|
491
511
|
)
|
492
512
|
|
493
513
|
self._fe = None
|
@@ -496,7 +516,8 @@ class FunctionExecutorController:
|
|
496
516
|
FunctionExecutorState(
|
497
517
|
description=self._fe_description,
|
498
518
|
status=FunctionExecutorStatus.FUNCTION_EXECUTOR_STATUS_TERMINATED,
|
499
|
-
termination_reason=
|
519
|
+
termination_reason=event.fe_termination_reason,
|
520
|
+
allocation_ids_caused_termination=event.allocation_ids_caused_termination,
|
500
521
|
)
|
501
522
|
)
|
502
523
|
self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATED)
|
@@ -512,8 +533,14 @@ class FunctionExecutorController:
|
|
512
533
|
"Function Executor health check failed, terminating Function Executor",
|
513
534
|
reason=result.reason,
|
514
535
|
)
|
536
|
+
|
515
537
|
self._start_termination(
|
516
|
-
|
538
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY,
|
539
|
+
allocation_ids_caused_termination=(
|
540
|
+
[]
|
541
|
+
if self._running_task is None
|
542
|
+
else [self._running_task.allocation.allocation_id]
|
543
|
+
),
|
517
544
|
)
|
518
545
|
|
519
546
|
def _handle_event_task_preparation_finished(
|
@@ -579,9 +606,11 @@ class FunctionExecutorController:
|
|
579
606
|
_FE_CONTROLLER_STATE.TERMINATING,
|
580
607
|
_FE_CONTROLLER_STATE.TERMINATED,
|
581
608
|
]:
|
582
|
-
task_info.output
|
583
|
-
|
584
|
-
|
609
|
+
if task_info.output is None:
|
610
|
+
# The output can be set already by FE startup failure handler.
|
611
|
+
task_info.output = TaskOutput.function_executor_terminated(
|
612
|
+
task_info.allocation
|
613
|
+
)
|
585
614
|
self._start_task_output_upload(task_info)
|
586
615
|
elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
|
587
616
|
self._running_task = task_info
|
@@ -627,7 +656,10 @@ class FunctionExecutorController:
|
|
627
656
|
)
|
628
657
|
else:
|
629
658
|
self._start_termination(
|
630
|
-
|
659
|
+
fe_termination_reason=event.function_executor_termination_reason,
|
660
|
+
allocation_ids_caused_termination=[
|
661
|
+
event.task_info.allocation.allocation_id
|
662
|
+
],
|
631
663
|
)
|
632
664
|
|
633
665
|
# Ignore is_cancelled because cancelling a task still involves uploading its output.
|
@@ -684,7 +716,9 @@ class FunctionExecutorController:
|
|
684
716
|
self._state_reporter.schedule_state_report()
|
685
717
|
|
686
718
|
def _start_termination(
|
687
|
-
self,
|
719
|
+
self,
|
720
|
+
fe_termination_reason: FunctionExecutorTerminationReason,
|
721
|
+
allocation_ids_caused_termination: List[str],
|
688
722
|
) -> None:
|
689
723
|
"""Starts termination of the Function Executor if it's not started yet.
|
690
724
|
|
@@ -698,16 +732,21 @@ class FunctionExecutorController:
|
|
698
732
|
# when the FE is unhealthy. Dedup the calls to keep state machine consistent.
|
699
733
|
return
|
700
734
|
|
701
|
-
self._fe_termination_reason = termination_reason
|
702
735
|
self._update_internal_state(_FE_CONTROLLER_STATE.TERMINATING)
|
703
|
-
next_aio =
|
736
|
+
next_aio = terminate_function_executor(
|
704
737
|
function_executor=self._fe,
|
705
738
|
lock=self._destroy_lock,
|
739
|
+
fe_termination_reason=fe_termination_reason,
|
740
|
+
allocation_ids_caused_termination=allocation_ids_caused_termination,
|
706
741
|
logger=self._logger,
|
707
742
|
)
|
708
743
|
self._spawn_aio_for_fe(
|
709
744
|
aio=next_aio,
|
710
|
-
on_exception=
|
745
|
+
on_exception=FunctionExecutorTerminated(
|
746
|
+
is_success=False,
|
747
|
+
fe_termination_reason=fe_termination_reason,
|
748
|
+
allocation_ids_caused_termination=allocation_ids_caused_termination,
|
749
|
+
),
|
711
750
|
)
|
712
751
|
|
713
752
|
async def _shutdown_no_exceptions(self, event: ShutdownInitiated) -> None:
|
@@ -746,11 +785,14 @@ class FunctionExecutorController:
|
|
746
785
|
# BaseException includes asyncio.CancelledError which is always raised here.
|
747
786
|
pass
|
748
787
|
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
788
|
+
# Makes sure we don't run fe destroy concurrently with an event loop task.
|
789
|
+
# FE destroy uses asyncio.to_thread() calls so it doesn't get cancelled with all the tasks above.
|
790
|
+
async with self._destroy_lock:
|
791
|
+
if self._fe is not None:
|
792
|
+
self._logger.info(
|
793
|
+
"destroying function executor",
|
794
|
+
)
|
795
|
+
await self._fe.destroy()
|
754
796
|
|
755
797
|
# Cleanup the metric from this FE.
|
756
798
|
metric_function_executors_with_state.labels(
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, List, Optional
|
1
|
+
from typing import Any, Dict, List, Optional
|
2
2
|
|
3
3
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
4
4
|
SerializedObject,
|
@@ -6,11 +6,14 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
|
|
6
6
|
|
7
7
|
from indexify.proto.executor_api_pb2 import (
|
8
8
|
DataPayload,
|
9
|
+
FunctionExecutorTerminationReason,
|
9
10
|
TaskAllocation,
|
10
11
|
TaskFailureReason,
|
11
12
|
TaskOutcomeCode,
|
12
13
|
)
|
13
14
|
|
15
|
+
from .function_executor_startup_output import FunctionExecutorStartupOutput
|
16
|
+
|
14
17
|
|
15
18
|
class TaskMetrics:
|
16
19
|
"""Metrics for a task."""
|
@@ -105,3 +108,52 @@ class TaskOutput:
|
|
105
108
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
106
109
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
107
110
|
)
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def function_executor_startup_failed(
|
114
|
+
cls,
|
115
|
+
allocation: TaskAllocation,
|
116
|
+
fe_startup_output: FunctionExecutorStartupOutput,
|
117
|
+
logger: Any,
|
118
|
+
) -> "TaskOutput":
|
119
|
+
"""Creates a TaskOutput for the case when we fail a task because its FE startup failed."""
|
120
|
+
output = TaskOutput(
|
121
|
+
allocation=allocation,
|
122
|
+
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
123
|
+
failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
|
124
|
+
fe_startup_output.termination_reason, logger
|
125
|
+
),
|
126
|
+
)
|
127
|
+
# Use FE startup stdout, stderr for allocations that we failed because FE startup failed.
|
128
|
+
output.uploaded_stdout = fe_startup_output.stdout
|
129
|
+
output.uploaded_stderr = fe_startup_output.stderr
|
130
|
+
return output
|
131
|
+
|
132
|
+
|
133
|
+
def _fe_startup_failure_reason_to_task_failure_reason(
|
134
|
+
fe_termination_reason: FunctionExecutorTerminationReason, logger: Any
|
135
|
+
) -> TaskFailureReason:
|
136
|
+
# Only need to check FE termination reasons happening on FE startup.
|
137
|
+
if (
|
138
|
+
fe_termination_reason
|
139
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
|
140
|
+
):
|
141
|
+
return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR
|
142
|
+
elif (
|
143
|
+
fe_termination_reason
|
144
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT
|
145
|
+
):
|
146
|
+
return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT
|
147
|
+
elif (
|
148
|
+
fe_termination_reason
|
149
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR
|
150
|
+
):
|
151
|
+
return TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR
|
152
|
+
else:
|
153
|
+
logger.error(
|
154
|
+
"unexpected function executor startup failure reason",
|
155
|
+
fe_termination_reason=FunctionExecutorTerminationReason.Name(
|
156
|
+
fe_termination_reason
|
157
|
+
),
|
158
|
+
)
|
159
|
+
return TaskFailureReason.TASK_FAILURE_REASON_UNKNOWN
|
@@ -1,16 +1,19 @@
|
|
1
1
|
import asyncio
|
2
|
-
from typing import Any, Optional
|
2
|
+
from typing import Any, List, Optional
|
3
3
|
|
4
4
|
from indexify.executor.function_executor.function_executor import FunctionExecutor
|
5
|
+
from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
|
5
6
|
|
6
|
-
from .events import
|
7
|
+
from .events import FunctionExecutorTerminated
|
7
8
|
|
8
9
|
|
9
|
-
async def
|
10
|
+
async def terminate_function_executor(
|
10
11
|
function_executor: Optional[FunctionExecutor],
|
11
12
|
lock: asyncio.Lock,
|
13
|
+
fe_termination_reason: FunctionExecutorTerminationReason,
|
14
|
+
allocation_ids_caused_termination: List[str],
|
12
15
|
logger: Any,
|
13
|
-
) ->
|
16
|
+
) -> FunctionExecutorTerminated:
|
14
17
|
"""Destroys the function executor if it's not None.
|
15
18
|
|
16
19
|
The supplied lock is used to ensure that if a destroy operation is in progress,
|
@@ -28,4 +31,8 @@ async def destroy_function_executor(
|
|
28
31
|
)
|
29
32
|
await function_executor.destroy()
|
30
33
|
|
31
|
-
return
|
34
|
+
return FunctionExecutorTerminated(
|
35
|
+
is_success=True,
|
36
|
+
fe_termination_reason=fe_termination_reason,
|
37
|
+
allocation_ids_caused_termination=allocation_ids_caused_termination,
|
38
|
+
)
|
@@ -125,6 +125,7 @@ message FunctionExecutorState {
|
|
125
125
|
optional FunctionExecutorDescription description = 1;
|
126
126
|
optional FunctionExecutorStatus status = 2;
|
127
127
|
optional FunctionExecutorTerminationReason termination_reason = 3;
|
128
|
+
repeated string allocation_ids_caused_termination = 4;
|
128
129
|
}
|
129
130
|
|
130
131
|
message FunctionExecutorUpdate {
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
4
|
+
# source: indexify/proto/executor_api.proto
|
5
|
+
# Protobuf Python Version: 6.31.0
|
6
|
+
"""Generated protocol buffer code."""
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
11
|
+
from google.protobuf.internal import builder as _builder
|
12
|
+
|
13
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
+
_runtime_version.Domain.PUBLIC, 6, 31, 0, "", "indexify/proto/executor_api.proto"
|
15
|
+
)
|
16
|
+
# @@protoc_insertion_point(imports)
|
17
|
+
|
18
|
+
_sym_db = _symbol_database.Default()
|
19
|
+
|
20
|
+
|
21
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xcf\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x12)\n!allocation_ids_caused_termination\x18\x04 \x03(\tB\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
23
|
+
)
|
24
|
+
|
25
|
+
_globals = globals()
|
26
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
27
|
+
_builder.BuildTopDescriptorsAndMessages(
|
28
|
+
DESCRIPTOR, "indexify.proto.executor_api_pb2", _globals
|
29
|
+
)
|
30
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
31
|
+
DESCRIPTOR._loaded_options = None
|
32
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_start = 5366
|
35
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5575
|
36
|
+
_globals["_GPUMODEL"]._serialized_start = 5578
|
37
|
+
_globals["_GPUMODEL"]._serialized_end = 5792
|
38
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5795
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5974
|
40
|
+
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5977
|
41
|
+
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6509
|
42
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 6512
|
43
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 6677
|
44
|
+
_globals["_TASKOUTCOMECODE"]._serialized_start = 6679
|
45
|
+
_globals["_TASKOUTCOMECODE"]._serialized_end = 6789
|
46
|
+
_globals["_TASKFAILUREREASON"]._serialized_start = 6792
|
47
|
+
_globals["_TASKFAILUREREASON"]._serialized_end = 7102
|
48
|
+
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
|
+
_globals["_DATAPAYLOAD"]._serialized_end = 290
|
50
|
+
_globals["_GPURESOURCES"]._serialized_start = 292
|
51
|
+
_globals["_GPURESOURCES"]._serialized_end = 393
|
52
|
+
_globals["_HOSTRESOURCES"]._serialized_start = 396
|
53
|
+
_globals["_HOSTRESOURCES"]._serialized_end = 590
|
54
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_start = 593
|
55
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_end = 780
|
56
|
+
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_start = 783
|
57
|
+
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_end = 999
|
58
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1002
|
59
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1565
|
60
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1568
|
61
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1903
|
62
|
+
_globals["_FUNCTIONEXECUTORUPDATE"]._serialized_start = 1906
|
63
|
+
_globals["_FUNCTIONEXECUTORUPDATE"]._serialized_end = 2174
|
64
|
+
_globals["_EXECUTORSTATE"]._serialized_start = 2177
|
65
|
+
_globals["_EXECUTORSTATE"]._serialized_end = 2895
|
66
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2708
|
67
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2753
|
68
|
+
_globals["_EXECUTORUPDATE"]._serialized_start = 2898
|
69
|
+
_globals["_EXECUTORUPDATE"]._serialized_end = 3083
|
70
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 3086
|
71
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 3277
|
72
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 3279
|
73
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 3308
|
74
|
+
_globals["_TASKRETRYPOLICY"]._serialized_start = 3311
|
75
|
+
_globals["_TASKRETRYPOLICY"]._serialized_end = 3518
|
76
|
+
_globals["_TASK"]._serialized_start = 3521
|
77
|
+
_globals["_TASK"]._serialized_end = 4103
|
78
|
+
_globals["_TASKALLOCATION"]._serialized_start = 4106
|
79
|
+
_globals["_TASKALLOCATION"]._serialized_end = 4279
|
80
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 4281
|
81
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 4356
|
82
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 4359
|
83
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4544
|
84
|
+
_globals["_TASKRESULT"]._serialized_start = 4547
|
85
|
+
_globals["_TASKRESULT"]._serialized_end = 5363
|
86
|
+
_globals["_EXECUTORAPI"]._serialized_start = 7105
|
87
|
+
_globals["_EXECUTORAPI"]._serialized_end = 7360
|
88
|
+
# @@protoc_insertion_point(module_scope)
|
@@ -274,13 +274,20 @@ class FunctionExecutorDescription(_message.Message):
|
|
274
274
|
) -> None: ...
|
275
275
|
|
276
276
|
class FunctionExecutorState(_message.Message):
|
277
|
-
__slots__ = (
|
277
|
+
__slots__ = (
|
278
|
+
"description",
|
279
|
+
"status",
|
280
|
+
"termination_reason",
|
281
|
+
"allocation_ids_caused_termination",
|
282
|
+
)
|
278
283
|
DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
|
279
284
|
STATUS_FIELD_NUMBER: _ClassVar[int]
|
280
285
|
TERMINATION_REASON_FIELD_NUMBER: _ClassVar[int]
|
286
|
+
ALLOCATION_IDS_CAUSED_TERMINATION_FIELD_NUMBER: _ClassVar[int]
|
281
287
|
description: FunctionExecutorDescription
|
282
288
|
status: FunctionExecutorStatus
|
283
289
|
termination_reason: FunctionExecutorTerminationReason
|
290
|
+
allocation_ids_caused_termination: _containers.RepeatedScalarFieldContainer[str]
|
284
291
|
def __init__(
|
285
292
|
self,
|
286
293
|
description: _Optional[_Union[FunctionExecutorDescription, _Mapping]] = ...,
|
@@ -288,6 +295,7 @@ class FunctionExecutorState(_message.Message):
|
|
288
295
|
termination_reason: _Optional[
|
289
296
|
_Union[FunctionExecutorTerminationReason, str]
|
290
297
|
] = ...,
|
298
|
+
allocation_ids_caused_termination: _Optional[_Iterable[str]] = ...,
|
291
299
|
) -> None: ...
|
292
300
|
|
293
301
|
class FunctionExecutorUpdate(_message.Message):
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
-
# NO CHECKED-IN PROTOBUF GENCODE
|
4
|
-
# source: indexify/proto/executor_api.proto
|
5
|
-
# Protobuf Python Version: 6.31.0
|
6
|
-
"""Generated protocol buffer code."""
|
7
|
-
from google.protobuf import descriptor as _descriptor
|
8
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
9
|
-
from google.protobuf import runtime_version as _runtime_version
|
10
|
-
from google.protobuf import symbol_database as _symbol_database
|
11
|
-
from google.protobuf.internal import builder as _builder
|
12
|
-
|
13
|
-
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
-
_runtime_version.Domain.PUBLIC, 6, 31, 0, "", "indexify/proto/executor_api.proto"
|
15
|
-
)
|
16
|
-
# @@protoc_insertion_point(imports)
|
17
|
-
|
18
|
-
_sym_db = _symbol_database.Default()
|
19
|
-
|
20
|
-
|
21
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xa4\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xb0\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12\x14\n\x07reducer\x18\x08 \x01(\x08H\x07\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x08\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\t\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_reducerB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
23
|
-
)
|
24
|
-
|
25
|
-
_globals = globals()
|
26
|
-
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
27
|
-
_builder.BuildTopDescriptorsAndMessages(
|
28
|
-
DESCRIPTOR, "indexify.proto.executor_api_pb2", _globals
|
29
|
-
)
|
30
|
-
if not _descriptor._USE_C_DESCRIPTORS:
|
31
|
-
DESCRIPTOR._loaded_options = None
|
32
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_start = 5323
|
35
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5532
|
36
|
-
_globals["_GPUMODEL"]._serialized_start = 5535
|
37
|
-
_globals["_GPUMODEL"]._serialized_end = 5749
|
38
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5752
|
39
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5931
|
40
|
-
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 5934
|
41
|
-
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6466
|
42
|
-
_globals["_EXECUTORSTATUS"]._serialized_start = 6469
|
43
|
-
_globals["_EXECUTORSTATUS"]._serialized_end = 6634
|
44
|
-
_globals["_TASKOUTCOMECODE"]._serialized_start = 6636
|
45
|
-
_globals["_TASKOUTCOMECODE"]._serialized_end = 6746
|
46
|
-
_globals["_TASKFAILUREREASON"]._serialized_start = 6749
|
47
|
-
_globals["_TASKFAILUREREASON"]._serialized_end = 7059
|
48
|
-
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
|
-
_globals["_DATAPAYLOAD"]._serialized_end = 290
|
50
|
-
_globals["_GPURESOURCES"]._serialized_start = 292
|
51
|
-
_globals["_GPURESOURCES"]._serialized_end = 393
|
52
|
-
_globals["_HOSTRESOURCES"]._serialized_start = 396
|
53
|
-
_globals["_HOSTRESOURCES"]._serialized_end = 590
|
54
|
-
_globals["_ALLOWEDFUNCTION"]._serialized_start = 593
|
55
|
-
_globals["_ALLOWEDFUNCTION"]._serialized_end = 780
|
56
|
-
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_start = 783
|
57
|
-
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_end = 999
|
58
|
-
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1002
|
59
|
-
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1565
|
60
|
-
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1568
|
61
|
-
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1860
|
62
|
-
_globals["_FUNCTIONEXECUTORUPDATE"]._serialized_start = 1863
|
63
|
-
_globals["_FUNCTIONEXECUTORUPDATE"]._serialized_end = 2131
|
64
|
-
_globals["_EXECUTORSTATE"]._serialized_start = 2134
|
65
|
-
_globals["_EXECUTORSTATE"]._serialized_end = 2852
|
66
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2665
|
67
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2710
|
68
|
-
_globals["_EXECUTORUPDATE"]._serialized_start = 2855
|
69
|
-
_globals["_EXECUTORUPDATE"]._serialized_end = 3040
|
70
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 3043
|
71
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 3234
|
72
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 3236
|
73
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 3265
|
74
|
-
_globals["_TASKRETRYPOLICY"]._serialized_start = 3268
|
75
|
-
_globals["_TASKRETRYPOLICY"]._serialized_end = 3475
|
76
|
-
_globals["_TASK"]._serialized_start = 3478
|
77
|
-
_globals["_TASK"]._serialized_end = 4060
|
78
|
-
_globals["_TASKALLOCATION"]._serialized_start = 4063
|
79
|
-
_globals["_TASKALLOCATION"]._serialized_end = 4236
|
80
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 4238
|
81
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 4313
|
82
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 4316
|
83
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4501
|
84
|
-
_globals["_TASKRESULT"]._serialized_start = 4504
|
85
|
-
_globals["_TASKRESULT"]._serialized_end = 5320
|
86
|
-
_globals["_EXECUTORAPI"]._serialized_start = 7062
|
87
|
-
_globals["_EXECUTORAPI"]._serialized_end = 7317
|
88
|
-
# @@protoc_insertion_point(module_scope)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/downloads.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/loggers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/run_task.py
RENAMED
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_info.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.11 → indexify-0.4.13}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|