indexify 0.3.23__tar.gz → 0.3.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.3.23 → indexify-0.3.25}/PKG-INFO +1 -1
- {indexify-0.3.23 → indexify-0.3.25}/pyproject.toml +1 -1
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/cli/cli.py +3 -1
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/executor.py +6 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/state_reporter.py +31 -15
- indexify-0.3.25/src/indexify/executor/host_resources/host_resources.py +50 -0
- indexify-0.3.25/src/indexify/executor/host_resources/nvidia_gpu.py +77 -0
- indexify-0.3.25/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +52 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/proto/executor_api.proto +9 -15
- indexify-0.3.25/src/indexify/proto/executor_api_pb2.py +86 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/proto/executor_api_pb2.pyi +12 -23
- indexify-0.3.23/src/indexify/proto/executor_api_pb2.py +0 -86
- {indexify-0.3.23 → indexify-0.3.25}/README.md +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/README.md +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/api_objects.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/downloader.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/executor_flavor.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/function_executor_state.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/function_executor_states_container.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/function_executor_status.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/metrics/single_task_runner.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/single_task_runner.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/task_input.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/task_output.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/channel_manager.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/function_executor_controller.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/metrics/channel_manager.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/metrics/state_reconciler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/metrics/state_reporter.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/metrics/task_controller.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/state_reconciler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/task_controller.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/metrics/downloader.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/metrics/task_fetcher.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/metrics/task_reporter.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/metrics/task_runner.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/function_allowlist.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/runtime_probes.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/task_fetcher.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/task_reporter.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/task_runner.py +0 -0
- {indexify-0.3.23 → indexify-0.3.25}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.3.
|
4
|
+
version = "0.3.25"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -7,7 +7,6 @@ from tensorlake.utils.logging import (
|
|
7
7
|
configure_logging_early()
|
8
8
|
|
9
9
|
import os
|
10
|
-
import re
|
11
10
|
import shutil
|
12
11
|
import sys
|
13
12
|
from importlib.metadata import version
|
@@ -33,6 +32,8 @@ from indexify.executor.executor_flavor import ExecutorFlavor
|
|
33
32
|
from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
|
34
33
|
SubprocessFunctionExecutorServerFactory,
|
35
34
|
)
|
35
|
+
from indexify.executor.host_resources.host_resources import HostResourcesProvider
|
36
|
+
from indexify.executor.host_resources.nvidia_gpu_allocator import NvidiaGPUAllocator
|
36
37
|
from indexify.executor.monitoring.health_checker.generic_health_checker import (
|
37
38
|
GenericHealthChecker,
|
38
39
|
)
|
@@ -225,6 +226,7 @@ def executor(
|
|
225
226
|
monitoring_server_port=monitoring_server_port,
|
226
227
|
enable_grpc_state_reconciler=enable_grpc_state_reconciler,
|
227
228
|
blob_store=blob_store,
|
229
|
+
host_resources_provider=HostResourcesProvider(NvidiaGPUAllocator(logger)),
|
228
230
|
).run()
|
229
231
|
|
230
232
|
|
@@ -24,6 +24,7 @@ from .function_executor.server.function_executor_server_factory import (
|
|
24
24
|
from .grpc.channel_manager import ChannelManager
|
25
25
|
from .grpc.state_reconciler import ExecutorStateReconciler
|
26
26
|
from .grpc.state_reporter import ExecutorStateReporter
|
27
|
+
from .host_resources.host_resources import HostResourcesProvider
|
27
28
|
from .metrics.executor import (
|
28
29
|
METRIC_TASKS_COMPLETED_OUTCOME_ALL,
|
29
30
|
METRIC_TASKS_COMPLETED_OUTCOME_ERROR_CUSTOMER_CODE,
|
@@ -71,6 +72,7 @@ class Executor:
|
|
71
72
|
monitoring_server_port: int,
|
72
73
|
enable_grpc_state_reconciler: bool,
|
73
74
|
blob_store: BLOBStore,
|
75
|
+
host_resources_provider: HostResourcesProvider,
|
74
76
|
):
|
75
77
|
self._logger = structlog.get_logger(module=__name__)
|
76
78
|
self._is_shutdown: bool = False
|
@@ -118,6 +120,7 @@ class Executor:
|
|
118
120
|
function_allowlist=self._function_allowlist,
|
119
121
|
function_executor_states=self._function_executor_states,
|
120
122
|
channel_manager=self._channel_manager,
|
123
|
+
host_resources_provider=host_resources_provider,
|
121
124
|
logger=self._logger,
|
122
125
|
)
|
123
126
|
self._state_reporter.update_executor_status(
|
@@ -171,6 +174,7 @@ class Executor:
|
|
171
174
|
executor_info: Dict[str, str] = {
|
172
175
|
"id": id,
|
173
176
|
"dev_mode": str(development_mode),
|
177
|
+
"flavor": flavor.name,
|
174
178
|
"version": version,
|
175
179
|
"code_path": str(code_path),
|
176
180
|
"server_addr": server_addr,
|
@@ -179,6 +183,8 @@ class Executor:
|
|
179
183
|
"enable_grpc_state_reconciler": str(enable_grpc_state_reconciler),
|
180
184
|
"hostname": gethostname(),
|
181
185
|
}
|
186
|
+
for key, value in labels.items():
|
187
|
+
executor_info["label_" + key] = value
|
182
188
|
executor_info.update(function_allowlist_to_info_dict(function_allowlist))
|
183
189
|
metric_executor_info.info(executor_info)
|
184
190
|
|
@@ -18,10 +18,12 @@ from indexify.proto.executor_api_pb2 import (
|
|
18
18
|
from indexify.proto.executor_api_pb2 import (
|
19
19
|
FunctionExecutorStatus as FunctionExecutorStatusProto,
|
20
20
|
)
|
21
|
+
from indexify.proto.executor_api_pb2 import GPUModel as GPUModelProto
|
21
22
|
from indexify.proto.executor_api_pb2 import (
|
22
|
-
GPUModel,
|
23
23
|
GPUResources,
|
24
|
-
|
24
|
+
)
|
25
|
+
from indexify.proto.executor_api_pb2 import HostResources as HostResourcesProto
|
26
|
+
from indexify.proto.executor_api_pb2 import (
|
25
27
|
ReportExecutorStateRequest,
|
26
28
|
)
|
27
29
|
from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
|
@@ -33,6 +35,7 @@ from ..function_executor.function_executor_states_container import (
|
|
33
35
|
FunctionExecutorStatesContainer,
|
34
36
|
)
|
35
37
|
from ..function_executor.function_executor_status import FunctionExecutorStatus
|
38
|
+
from ..host_resources.host_resources import HostResources, HostResourcesProvider
|
36
39
|
from ..runtime_probes import RuntimeProbes
|
37
40
|
from .channel_manager import ChannelManager
|
38
41
|
from .metrics.state_reporter import (
|
@@ -57,6 +60,7 @@ class ExecutorStateReporter:
|
|
57
60
|
function_allowlist: Optional[List[FunctionURI]],
|
58
61
|
function_executor_states: FunctionExecutorStatesContainer,
|
59
62
|
channel_manager: ChannelManager,
|
63
|
+
host_resources_provider: HostResourcesProvider,
|
60
64
|
logger: Any,
|
61
65
|
reporting_interval_sec: int = _REPORTING_INTERVAL_SEC,
|
62
66
|
):
|
@@ -72,6 +76,9 @@ class ExecutorStateReporter:
|
|
72
76
|
self._channel_manager = channel_manager
|
73
77
|
self._logger: Any = logger.bind(module=__name__)
|
74
78
|
self._reporting_interval_sec: int = reporting_interval_sec
|
79
|
+
self._total_host_resources: HostResourcesProto = _host_resources_to_proto(
|
80
|
+
host_resources_provider.total_resources(logger)
|
81
|
+
)
|
75
82
|
|
76
83
|
self._is_shutdown: bool = False
|
77
84
|
self._executor_status: ExecutorStatus = ExecutorStatus.EXECUTOR_STATUS_UNKNOWN
|
@@ -130,7 +137,9 @@ class ExecutorStateReporter:
|
|
130
137
|
flavor=_to_grpc_executor_flavor(self._flavor, self._logger),
|
131
138
|
version=self._version,
|
132
139
|
status=self._executor_status,
|
133
|
-
free_resources
|
140
|
+
# Server requires free_resources to be set but ignores its value for now.
|
141
|
+
free_resources=self._total_host_resources,
|
142
|
+
total_resources=self._total_host_resources,
|
134
143
|
allowed_functions=self._allowed_functions,
|
135
144
|
function_executor_states=await self._fetch_function_executor_states(),
|
136
145
|
labels=self._labels,
|
@@ -151,18 +160,6 @@ class ExecutorStateReporter:
|
|
151
160
|
"""
|
152
161
|
self._is_shutdown = True
|
153
162
|
|
154
|
-
async def _fetch_free_host_resources(self) -> HostResources:
|
155
|
-
# TODO: Implement host resource metrics reporting.
|
156
|
-
return HostResources(
|
157
|
-
cpu_count=0,
|
158
|
-
memory_bytes=0,
|
159
|
-
disk_bytes=0,
|
160
|
-
gpu=GPUResources(
|
161
|
-
count=0,
|
162
|
-
model=GPUModel.GPU_MODEL_UNKNOWN,
|
163
|
-
),
|
164
|
-
)
|
165
|
-
|
166
163
|
async def _fetch_function_executor_states(self) -> List[FunctionExecutorStateProto]:
|
167
164
|
states = []
|
168
165
|
|
@@ -264,3 +261,22 @@ def _state_hash(state: ExecutorState) -> str:
|
|
264
261
|
hasher = hashlib.sha256(usedforsecurity=False)
|
265
262
|
hasher.update(serialized_state)
|
266
263
|
return hasher.hexdigest()
|
264
|
+
|
265
|
+
|
266
|
+
def _host_resources_to_proto(host_resources: HostResources) -> HostResourcesProto:
|
267
|
+
proto = HostResourcesProto(
|
268
|
+
cpu_count=host_resources.cpu_count,
|
269
|
+
memory_bytes=host_resources.memory_mb * 1024 * 1024,
|
270
|
+
disk_bytes=host_resources.disk_mb * 1024 * 1024,
|
271
|
+
)
|
272
|
+
if len(host_resources.gpus) > 0:
|
273
|
+
proto.gpu.CopyFrom(
|
274
|
+
GPUResources(
|
275
|
+
count=len(host_resources.gpus),
|
276
|
+
deprecated_model=GPUModelProto.GPU_MODEL_UNKNOWN, # TODO: Remove this field
|
277
|
+
model=host_resources.gpus[
|
278
|
+
0
|
279
|
+
].model.value, # All GPUs should have the same model
|
280
|
+
)
|
281
|
+
)
|
282
|
+
return proto
|
@@ -0,0 +1,50 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
|
5
|
+
from .nvidia_gpu import NvidiaGPUInfo
|
6
|
+
from .nvidia_gpu_allocator import NvidiaGPUAllocator
|
7
|
+
|
8
|
+
|
9
|
+
class HostResources(BaseModel):
|
10
|
+
cpu_count: int
|
11
|
+
memory_mb: int
|
12
|
+
disk_mb: int
|
13
|
+
gpus: List[NvidiaGPUInfo]
|
14
|
+
|
15
|
+
|
16
|
+
class HostResourcesProvider:
|
17
|
+
"""
|
18
|
+
HostResourcesProvider is a class that provides information about the host resources.
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(self, gpu_allocator: NvidiaGPUAllocator):
|
22
|
+
self._gpu_allocator: NvidiaGPUAllocator = gpu_allocator
|
23
|
+
|
24
|
+
def total_resources(self, logger) -> HostResources:
|
25
|
+
"""Returns all hardware resources that exist at the host.
|
26
|
+
|
27
|
+
Raises Exception on error.
|
28
|
+
"""
|
29
|
+
logger = logger.bind(module=__name__)
|
30
|
+
|
31
|
+
return HostResources(
|
32
|
+
cpu_count=0, # TODO: Implement for Linux and MacOS hosts
|
33
|
+
memory_mb=0, # TODO: Implement for Linux and MacOS hosts
|
34
|
+
disk_mb=0, # TODO: Implement for Linux and MacOS hosts
|
35
|
+
gpus=self._gpu_allocator.list_all(),
|
36
|
+
)
|
37
|
+
|
38
|
+
def free_resources(self, logger) -> HostResources:
|
39
|
+
"""Returns all hardware resources that are free at the host.
|
40
|
+
|
41
|
+
Raises Exception on error.
|
42
|
+
"""
|
43
|
+
logger = logger.bind(module=__name__)
|
44
|
+
|
45
|
+
return HostResources(
|
46
|
+
cpu_count=0, # TODO: Implement for Linux and MacOS hosts
|
47
|
+
memory_mb=0, # TODO: Implement for Linux and MacOS hosts
|
48
|
+
disk_mb=0, # TODO: Implement for Linux and MacOS hosts
|
49
|
+
gpus=self._gpu_allocator.list_free(),
|
50
|
+
)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
import subprocess
|
2
|
+
from enum import Enum
|
3
|
+
from typing import Any, List
|
4
|
+
|
5
|
+
from pydantic import BaseModel
|
6
|
+
from tensorlake.functions_sdk.resources import GPU_MODEL
|
7
|
+
|
8
|
+
|
9
|
+
# Only NVIDIA GPUs currently supported in Tensorlake SDK are listed here.
|
10
|
+
class NVIDIA_GPU_MODEL(str, Enum):
|
11
|
+
UNKNOWN = "UNKNOWN"
|
12
|
+
A100_40GB = GPU_MODEL.A100_40GB
|
13
|
+
A100_80GB = GPU_MODEL.A100_80GB
|
14
|
+
H100_80GB = GPU_MODEL.H100
|
15
|
+
|
16
|
+
|
17
|
+
class NvidiaGPUInfo(BaseModel):
|
18
|
+
index: str
|
19
|
+
uuid: str
|
20
|
+
product_name: str # The official product name.
|
21
|
+
model: NVIDIA_GPU_MODEL
|
22
|
+
|
23
|
+
|
24
|
+
def nvidia_gpus_are_available() -> bool:
|
25
|
+
try:
|
26
|
+
result: subprocess.CompletedProcess = subprocess.run(
|
27
|
+
["nvidia-smi"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
28
|
+
)
|
29
|
+
return result.returncode == 0
|
30
|
+
except Exception:
|
31
|
+
return False
|
32
|
+
|
33
|
+
|
34
|
+
def fetch_nvidia_gpu_infos(logger: Any) -> List[NvidiaGPUInfo]:
|
35
|
+
logger = logger.bind(module=__name__)
|
36
|
+
logger.info("Fetching GPU information")
|
37
|
+
|
38
|
+
try:
|
39
|
+
result: subprocess.CompletedProcess = subprocess.run(
|
40
|
+
["nvidia-smi", "--query-gpu=index,name,uuid", "--format=csv,noheader"],
|
41
|
+
capture_output=True,
|
42
|
+
check=True,
|
43
|
+
text=True,
|
44
|
+
)
|
45
|
+
except subprocess.CalledProcessError as e:
|
46
|
+
logger.error("Failed to fetch GPU information", exc_info=e)
|
47
|
+
raise
|
48
|
+
|
49
|
+
infos: List[NvidiaGPUInfo] = []
|
50
|
+
for line in result.stdout.splitlines():
|
51
|
+
# Example:
|
52
|
+
# nvidia-smi --query-gpu=index,name,uuid --format=csv,noheader
|
53
|
+
# 0, NVIDIA A100-SXM4-80GB, GPU-89fdc1e1-18b2-f499-c12b-82bcb9bfb3fa
|
54
|
+
# 1, NVIDIA A100-PCIE-40GB, GPU-e9c9aa65-bff3-405a-ab7c-dc879cc88169
|
55
|
+
# 2, NVIDIA H100 80GB HBM3, GPU-8c35f4c9-4dff-c9a2-866f-afb5d82e1dd7
|
56
|
+
parts = line.split(",")
|
57
|
+
index = parts[0].strip()
|
58
|
+
product_name = parts[1].strip()
|
59
|
+
uuid = parts[2].strip()
|
60
|
+
|
61
|
+
model = NVIDIA_GPU_MODEL.UNKNOWN
|
62
|
+
if product_name.startswith("NVIDIA A100") and product_name.endswith("80GB"):
|
63
|
+
model = NVIDIA_GPU_MODEL.A100_80GB
|
64
|
+
if product_name.startswith("NVIDIA A100") and product_name.endswith("40GB"):
|
65
|
+
model = NVIDIA_GPU_MODEL.A100_40GB
|
66
|
+
elif product_name.startswith("NVIDIA H100"):
|
67
|
+
model = NVIDIA_GPU_MODEL.H100_80GB
|
68
|
+
|
69
|
+
if model == NVIDIA_GPU_MODEL.UNKNOWN:
|
70
|
+
logger.warning("Unknown GPU model detected", nvidia_smi_output=line)
|
71
|
+
|
72
|
+
infos.append(
|
73
|
+
NvidiaGPUInfo(
|
74
|
+
index=index, uuid=uuid, product_name=product_name, model=model
|
75
|
+
)
|
76
|
+
)
|
77
|
+
return infos
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from typing import Any, List
|
2
|
+
|
3
|
+
from .nvidia_gpu import NvidiaGPUInfo, fetch_nvidia_gpu_infos, nvidia_gpus_are_available
|
4
|
+
|
5
|
+
|
6
|
+
class NvidiaGPUAllocator:
|
7
|
+
"""NvidiaGPUAllocator is a class that manages the allocation and deallocation of GPUs."""
|
8
|
+
|
9
|
+
def __init__(self, logger: Any):
|
10
|
+
gpu_infos: List[NvidiaGPUInfo] = []
|
11
|
+
|
12
|
+
if nvidia_gpus_are_available():
|
13
|
+
gpu_infos = fetch_nvidia_gpu_infos(logger)
|
14
|
+
logger.bind(module=__name__).info(
|
15
|
+
"Fetched information about NVIDIA GPUs:", info=gpu_infos
|
16
|
+
)
|
17
|
+
|
18
|
+
self._all_gpus: List[NvidiaGPUInfo] = gpu_infos
|
19
|
+
self._free_gpus: List[NvidiaGPUInfo] = list(gpu_infos)
|
20
|
+
|
21
|
+
def allocate(self, count: int, logger: Any) -> List[NvidiaGPUInfo]:
|
22
|
+
"""
|
23
|
+
Allocates a specified number of GPUs.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
count (int): The number of GPUs to allocate.
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
List[NvidiaGPUInfo]: A list of allocated GPUs. The list is empty if count is 0.
|
30
|
+
|
31
|
+
Raises:
|
32
|
+
ValueError: If the requested number of GPUs exceeds free GPUs.
|
33
|
+
Exception: If an error occurs during allocation.
|
34
|
+
"""
|
35
|
+
if count > len(self._free_gpus):
|
36
|
+
raise ValueError(
|
37
|
+
f"Not enough free GPUs available, requested={count}, available={len(self._free_gpus)}"
|
38
|
+
)
|
39
|
+
|
40
|
+
allocated_gpus: List[NvidiaGPUInfo] = []
|
41
|
+
for _ in range(count):
|
42
|
+
allocated_gpus.append(self._free_gpus.pop())
|
43
|
+
return allocated_gpus
|
44
|
+
|
45
|
+
def deallocate(self, gpus: List[NvidiaGPUInfo]) -> None:
|
46
|
+
self._free_gpus.extend(gpus)
|
47
|
+
|
48
|
+
def list_all(self) -> List[NvidiaGPUInfo]:
|
49
|
+
return list(self._all_gpus) # Return a copy to avoid external modification
|
50
|
+
|
51
|
+
def list_free(self) -> List[NvidiaGPUInfo]:
|
52
|
+
return list(self._free_gpus) # Return a copy to avoid external modification
|
@@ -28,26 +28,17 @@ message DataPayload {
|
|
28
28
|
|
29
29
|
// ===== report_executor_state RPC =====
|
30
30
|
|
31
|
+
// Deprecated enum. TODO: remove when all the code is using model string.
|
31
32
|
enum GPUModel {
|
32
33
|
GPU_MODEL_UNKNOWN = 0;
|
33
|
-
GPU_MODEL_NVIDIA_TESLA_T4_16GB = 10;
|
34
|
-
GPU_MODEL_NVIDIA_TESLA_V100_16GB = 20;
|
35
|
-
GPU_MODEL_NVIDIA_A10_24GB = 30;
|
36
|
-
GPU_MODEL_NVIDIA_A6000_48GB = 40;
|
37
|
-
// A100 GPUs
|
38
|
-
GPU_MODEL_NVIDIA_A100_SXM4_40GB = 50;
|
39
|
-
GPU_MODEL_NVIDIA_A100_SXM4_80GB = 51;
|
40
|
-
GPU_MODEL_NVIDIA_A100_PCI_40GB = 52;
|
41
|
-
// H100 GPUs
|
42
|
-
GPU_MODEL_NVIDIA_H100_SXM5_80GB = 60;
|
43
|
-
GPU_MODEL_NVIDIA_H100_PCI_80GB = 61;
|
44
|
-
GPU_MODEL_NVIDIA_RTX_6000_24GB = 62;
|
45
34
|
}
|
46
35
|
|
47
36
|
// Free GPUs available at the Executor.
|
48
37
|
message GPUResources {
|
49
38
|
optional uint32 count = 1;
|
50
|
-
optional GPUModel
|
39
|
+
optional GPUModel deprecated_model = 2;
|
40
|
+
// Either GPU_MODEL value from Tensorlake SDK or "UNKNOWN"
|
41
|
+
optional string model = 3;
|
51
42
|
}
|
52
43
|
|
53
44
|
// Resources that we're currently tracking and limiting on Executor.
|
@@ -55,7 +46,8 @@ message HostResources {
|
|
55
46
|
optional uint32 cpu_count = 1;
|
56
47
|
optional uint64 memory_bytes = 2;
|
57
48
|
optional uint64 disk_bytes = 3;
|
58
|
-
|
49
|
+
// Not set if no GPUs are available.
|
50
|
+
optional GPUResources gpu = 4;
|
59
51
|
}
|
60
52
|
|
61
53
|
// Specification of a single function that is allowed to be run on the Executor.
|
@@ -138,8 +130,10 @@ message ExecutorState {
|
|
138
130
|
optional ExecutorFlavor flavor = 4;
|
139
131
|
optional string version = 5;
|
140
132
|
optional ExecutorStatus status = 6;
|
133
|
+
// Total resources available at the Executor.
|
134
|
+
optional HostResources total_resources = 13;
|
141
135
|
// Free resources available at the Executor.
|
142
|
-
optional HostResources free_resources = 7;
|
136
|
+
optional HostResources free_resources = 7; // Not used right now.
|
143
137
|
// Empty allowed_functions list means that any function can run on the Executor.
|
144
138
|
repeated AllowedFunction allowed_functions = 8;
|
145
139
|
repeated FunctionExecutorState function_executor_states = 9;
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
4
|
+
# source: indexify/proto/executor_api.proto
|
5
|
+
# Protobuf Python Version: 5.29.0
|
6
|
+
"""Generated protocol buffer code."""
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
11
|
+
from google.protobuf.internal import builder as _builder
|
12
|
+
|
13
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
+
_runtime_version.Domain.PUBLIC, 5, 29, 0, "", "indexify/proto/executor_api.proto"
|
15
|
+
)
|
16
|
+
# @@protoc_insertion_point(imports)
|
17
|
+
|
18
|
+
_sym_db = _symbol_database.Default()
|
19
|
+
|
20
|
+
|
21
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"\x99\x01\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x38\n\x10\x64\x65precated_model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x12\x12\n\x05model\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_countB\x13\n\x11_deprecated_modelB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xe8\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12\x1b\n\x0estatus_message\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x11\n\x0f_status_message"\x9d\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12;\n\x0e\x66ree_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB\x11\n\x0f_free_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*!\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
23
|
+
)
|
24
|
+
|
25
|
+
_globals = globals()
|
26
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
27
|
+
_builder.BuildTopDescriptorsAndMessages(
|
28
|
+
DESCRIPTOR, "indexify.proto.executor_api_pb2", _globals
|
29
|
+
)
|
30
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
31
|
+
DESCRIPTOR._loaded_options = None
|
32
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_start = 4908
|
35
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5079
|
36
|
+
_globals["_GPUMODEL"]._serialized_start = 5081
|
37
|
+
_globals["_GPUMODEL"]._serialized_end = 5114
|
38
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5117
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5575
|
40
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 5578
|
41
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 5773
|
42
|
+
_globals["_EXECUTORFLAVOR"]._serialized_start = 5775
|
43
|
+
_globals["_EXECUTORFLAVOR"]._serialized_end = 5875
|
44
|
+
_globals["_TASKOUTCOME"]._serialized_start = 5877
|
45
|
+
_globals["_TASKOUTCOME"]._serialized_end = 5968
|
46
|
+
_globals["_OUTPUTENCODING"]._serialized_start = 5970
|
47
|
+
_globals["_OUTPUTENCODING"]._serialized_end = 6097
|
48
|
+
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
|
+
_globals["_DATAPAYLOAD"]._serialized_end = 318
|
50
|
+
_globals["_GPURESOURCES"]._serialized_start = 321
|
51
|
+
_globals["_GPURESOURCES"]._serialized_end = 474
|
52
|
+
_globals["_HOSTRESOURCES"]._serialized_start = 477
|
53
|
+
_globals["_HOSTRESOURCES"]._serialized_end = 671
|
54
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_start = 674
|
55
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_end = 861
|
56
|
+
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_start = 864
|
57
|
+
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_end = 1061
|
58
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1064
|
59
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1639
|
60
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1642
|
61
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1874
|
62
|
+
_globals["_EXECUTORSTATE"]._serialized_start = 1877
|
63
|
+
_globals["_EXECUTORSTATE"]._serialized_end = 2674
|
64
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2474
|
65
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2519
|
66
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 2676
|
67
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 2784
|
68
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 2786
|
69
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 2815
|
70
|
+
_globals["_TASKRETRYPOLICY"]._serialized_start = 2818
|
71
|
+
_globals["_TASKRETRYPOLICY"]._serialized_end = 3025
|
72
|
+
_globals["_TASK"]._serialized_start = 3028
|
73
|
+
_globals["_TASK"]._serialized_end = 3704
|
74
|
+
_globals["_TASKALLOCATION"]._serialized_start = 3706
|
75
|
+
_globals["_TASKALLOCATION"]._serialized_end = 3833
|
76
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 3835
|
77
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 3910
|
78
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 3913
|
79
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4098
|
80
|
+
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start = 4101
|
81
|
+
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4876
|
82
|
+
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4878
|
83
|
+
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4905
|
84
|
+
_globals["_EXECUTORAPI"]._serialized_start = 6100
|
85
|
+
_globals["_EXECUTORAPI"]._serialized_end = 6467
|
86
|
+
# @@protoc_insertion_point(module_scope)
|
@@ -21,16 +21,6 @@ class DataPayloadEncoding(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
21
21
|
class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
22
22
|
__slots__ = ()
|
23
23
|
GPU_MODEL_UNKNOWN: _ClassVar[GPUModel]
|
24
|
-
GPU_MODEL_NVIDIA_TESLA_T4_16GB: _ClassVar[GPUModel]
|
25
|
-
GPU_MODEL_NVIDIA_TESLA_V100_16GB: _ClassVar[GPUModel]
|
26
|
-
GPU_MODEL_NVIDIA_A10_24GB: _ClassVar[GPUModel]
|
27
|
-
GPU_MODEL_NVIDIA_A6000_48GB: _ClassVar[GPUModel]
|
28
|
-
GPU_MODEL_NVIDIA_A100_SXM4_40GB: _ClassVar[GPUModel]
|
29
|
-
GPU_MODEL_NVIDIA_A100_SXM4_80GB: _ClassVar[GPUModel]
|
30
|
-
GPU_MODEL_NVIDIA_A100_PCI_40GB: _ClassVar[GPUModel]
|
31
|
-
GPU_MODEL_NVIDIA_H100_SXM5_80GB: _ClassVar[GPUModel]
|
32
|
-
GPU_MODEL_NVIDIA_H100_PCI_80GB: _ClassVar[GPUModel]
|
33
|
-
GPU_MODEL_NVIDIA_RTX_6000_24GB: _ClassVar[GPUModel]
|
34
24
|
|
35
25
|
class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
36
26
|
__slots__ = ()
|
@@ -82,16 +72,6 @@ DATA_PAYLOAD_ENCODING_UTF8_JSON: DataPayloadEncoding
|
|
82
72
|
DATA_PAYLOAD_ENCODING_UTF8_TEXT: DataPayloadEncoding
|
83
73
|
DATA_PAYLOAD_ENCODING_BINARY_PICKLE: DataPayloadEncoding
|
84
74
|
GPU_MODEL_UNKNOWN: GPUModel
|
85
|
-
GPU_MODEL_NVIDIA_TESLA_T4_16GB: GPUModel
|
86
|
-
GPU_MODEL_NVIDIA_TESLA_V100_16GB: GPUModel
|
87
|
-
GPU_MODEL_NVIDIA_A10_24GB: GPUModel
|
88
|
-
GPU_MODEL_NVIDIA_A6000_48GB: GPUModel
|
89
|
-
GPU_MODEL_NVIDIA_A100_SXM4_40GB: GPUModel
|
90
|
-
GPU_MODEL_NVIDIA_A100_SXM4_80GB: GPUModel
|
91
|
-
GPU_MODEL_NVIDIA_A100_PCI_40GB: GPUModel
|
92
|
-
GPU_MODEL_NVIDIA_H100_SXM5_80GB: GPUModel
|
93
|
-
GPU_MODEL_NVIDIA_H100_PCI_80GB: GPUModel
|
94
|
-
GPU_MODEL_NVIDIA_RTX_6000_24GB: GPUModel
|
95
75
|
FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
|
96
76
|
FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
|
97
77
|
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
|
@@ -144,13 +124,18 @@ class DataPayload(_message.Message):
|
|
144
124
|
) -> None: ...
|
145
125
|
|
146
126
|
class GPUResources(_message.Message):
|
147
|
-
__slots__ = ("count", "model")
|
127
|
+
__slots__ = ("count", "deprecated_model", "model")
|
148
128
|
COUNT_FIELD_NUMBER: _ClassVar[int]
|
129
|
+
DEPRECATED_MODEL_FIELD_NUMBER: _ClassVar[int]
|
149
130
|
MODEL_FIELD_NUMBER: _ClassVar[int]
|
150
131
|
count: int
|
151
|
-
|
132
|
+
deprecated_model: GPUModel
|
133
|
+
model: str
|
152
134
|
def __init__(
|
153
|
-
self,
|
135
|
+
self,
|
136
|
+
count: _Optional[int] = ...,
|
137
|
+
deprecated_model: _Optional[_Union[GPUModel, str]] = ...,
|
138
|
+
model: _Optional[str] = ...,
|
154
139
|
) -> None: ...
|
155
140
|
|
156
141
|
class HostResources(_message.Message):
|
@@ -281,6 +266,7 @@ class ExecutorState(_message.Message):
|
|
281
266
|
"flavor",
|
282
267
|
"version",
|
283
268
|
"status",
|
269
|
+
"total_resources",
|
284
270
|
"free_resources",
|
285
271
|
"allowed_functions",
|
286
272
|
"function_executor_states",
|
@@ -305,6 +291,7 @@ class ExecutorState(_message.Message):
|
|
305
291
|
FLAVOR_FIELD_NUMBER: _ClassVar[int]
|
306
292
|
VERSION_FIELD_NUMBER: _ClassVar[int]
|
307
293
|
STATUS_FIELD_NUMBER: _ClassVar[int]
|
294
|
+
TOTAL_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
308
295
|
FREE_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
309
296
|
ALLOWED_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
|
310
297
|
FUNCTION_EXECUTOR_STATES_FIELD_NUMBER: _ClassVar[int]
|
@@ -317,6 +304,7 @@ class ExecutorState(_message.Message):
|
|
317
304
|
flavor: ExecutorFlavor
|
318
305
|
version: str
|
319
306
|
status: ExecutorStatus
|
307
|
+
total_resources: HostResources
|
320
308
|
free_resources: HostResources
|
321
309
|
allowed_functions: _containers.RepeatedCompositeFieldContainer[AllowedFunction]
|
322
310
|
function_executor_states: _containers.RepeatedCompositeFieldContainer[
|
@@ -333,6 +321,7 @@ class ExecutorState(_message.Message):
|
|
333
321
|
flavor: _Optional[_Union[ExecutorFlavor, str]] = ...,
|
334
322
|
version: _Optional[str] = ...,
|
335
323
|
status: _Optional[_Union[ExecutorStatus, str]] = ...,
|
324
|
+
total_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
|
336
325
|
free_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
|
337
326
|
allowed_functions: _Optional[
|
338
327
|
_Iterable[_Union[AllowedFunction, _Mapping]]
|
@@ -1,86 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
-
# NO CHECKED-IN PROTOBUF GENCODE
|
4
|
-
# source: indexify/proto/executor_api.proto
|
5
|
-
# Protobuf Python Version: 5.29.0
|
6
|
-
"""Generated protocol buffer code."""
|
7
|
-
from google.protobuf import descriptor as _descriptor
|
8
|
-
from google.protobuf import descriptor_pool as _descriptor_pool
|
9
|
-
from google.protobuf import runtime_version as _runtime_version
|
10
|
-
from google.protobuf import symbol_database as _symbol_database
|
11
|
-
from google.protobuf.internal import builder as _builder
|
12
|
-
|
13
|
-
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
-
_runtime_version.Domain.PUBLIC, 5, 29, 0, "", "indexify/proto/executor_api.proto"
|
15
|
-
)
|
16
|
-
# @@protoc_insertion_point(imports)
|
17
|
-
|
18
|
-
_sym_db = _symbol_database.Default()
|
19
|
-
|
20
|
-
|
21
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xe8\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12\x1b\n\x0estatus_message\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x11\n\x0f_status_message"\xcb\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12;\n\x0e\x66ree_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x07\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x08\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x11\n\x0f_free_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\x86\x03\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12"\n\x1eGPU_MODEL_NVIDIA_TESLA_T4_16GB\x10\n\x12$\n GPU_MODEL_NVIDIA_TESLA_V100_16GB\x10\x14\x12\x1d\n\x19GPU_MODEL_NVIDIA_A10_24GB\x10\x1e\x12\x1f\n\x1bGPU_MODEL_NVIDIA_A6000_48GB\x10(\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_40GB\x10\x32\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_80GB\x10\x33\x12"\n\x1eGPU_MODEL_NVIDIA_A100_PCI_40GB\x10\x34\x12#\n\x1fGPU_MODEL_NVIDIA_H100_SXM5_80GB\x10<\x12"\n\x1eGPU_MODEL_NVIDIA_H100_PCI_80GB\x10=\x12"\n\x1eGPU_MODEL_NVIDIA_RTX_6000_24GB\x10>*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
23
|
-
)
|
24
|
-
|
25
|
-
_globals = globals()
|
26
|
-
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
27
|
-
_builder.BuildTopDescriptorsAndMessages(
|
28
|
-
DESCRIPTOR, "indexify.proto.executor_api_pb2", _globals
|
29
|
-
)
|
30
|
-
if not _descriptor._USE_C_DESCRIPTORS:
|
31
|
-
DESCRIPTOR._loaded_options = None
|
32
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_start = 4773
|
35
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_end = 4944
|
36
|
-
_globals["_GPUMODEL"]._serialized_start = 4947
|
37
|
-
_globals["_GPUMODEL"]._serialized_end = 5337
|
38
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5340
|
39
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5798
|
40
|
-
_globals["_EXECUTORSTATUS"]._serialized_start = 5801
|
41
|
-
_globals["_EXECUTORSTATUS"]._serialized_end = 5996
|
42
|
-
_globals["_EXECUTORFLAVOR"]._serialized_start = 5998
|
43
|
-
_globals["_EXECUTORFLAVOR"]._serialized_end = 6098
|
44
|
-
_globals["_TASKOUTCOME"]._serialized_start = 6100
|
45
|
-
_globals["_TASKOUTCOME"]._serialized_end = 6191
|
46
|
-
_globals["_OUTPUTENCODING"]._serialized_start = 6193
|
47
|
-
_globals["_OUTPUTENCODING"]._serialized_end = 6320
|
48
|
-
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
|
-
_globals["_DATAPAYLOAD"]._serialized_end = 318
|
50
|
-
_globals["_GPURESOURCES"]._serialized_start = 320
|
51
|
-
_globals["_GPURESOURCES"]._serialized_end = 421
|
52
|
-
_globals["_HOSTRESOURCES"]._serialized_start = 424
|
53
|
-
_globals["_HOSTRESOURCES"]._serialized_end = 618
|
54
|
-
_globals["_ALLOWEDFUNCTION"]._serialized_start = 621
|
55
|
-
_globals["_ALLOWEDFUNCTION"]._serialized_end = 808
|
56
|
-
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_start = 811
|
57
|
-
_globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_end = 1008
|
58
|
-
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1011
|
59
|
-
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1586
|
60
|
-
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1589
|
61
|
-
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1821
|
62
|
-
_globals["_EXECUTORSTATE"]._serialized_start = 1824
|
63
|
-
_globals["_EXECUTORSTATE"]._serialized_end = 2539
|
64
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2359
|
65
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2404
|
66
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 2541
|
67
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 2649
|
68
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 2651
|
69
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 2680
|
70
|
-
_globals["_TASKRETRYPOLICY"]._serialized_start = 2683
|
71
|
-
_globals["_TASKRETRYPOLICY"]._serialized_end = 2890
|
72
|
-
_globals["_TASK"]._serialized_start = 2893
|
73
|
-
_globals["_TASK"]._serialized_end = 3569
|
74
|
-
_globals["_TASKALLOCATION"]._serialized_start = 3571
|
75
|
-
_globals["_TASKALLOCATION"]._serialized_end = 3698
|
76
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 3700
|
77
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 3775
|
78
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 3778
|
79
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 3963
|
80
|
-
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start = 3966
|
81
|
-
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4741
|
82
|
-
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4743
|
83
|
-
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4770
|
84
|
-
_globals["_EXECUTORAPI"]._serialized_start = 6323
|
85
|
-
_globals["_EXECUTORAPI"]._serialized_end = 6690
|
86
|
-
# @@protoc_insertion_point(module_scope)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/function_executor/single_task_runner.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/grpc/function_executor_controller.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.3.23 → indexify-0.3.25}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|