indexify 0.3.29__tar.gz → 0.3.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.3.29 → indexify-0.3.30}/PKG-INFO +1 -1
- {indexify-0.3.29 → indexify-0.3.30}/pyproject.toml +1 -1
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/cli/cli.py +13 -31
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/executor.py +0 -3
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +12 -28
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/state_reporter.py +4 -3
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/host_resources/nvidia_gpu.py +26 -12
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/proto/executor_api.proto +2 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/proto/executor_api_pb2.py +14 -14
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/proto/executor_api_pb2.pyi +4 -0
- {indexify-0.3.29 → indexify-0.3.30}/README.md +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/README.md +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/api_objects.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/downloader.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/executor_flavor.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/function_executor_state.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/function_executor_states_container.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/function_executor_status.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/metrics/single_task_runner.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/single_task_runner.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/task_input.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/task_output.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/channel_manager.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/function_executor_controller.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/metrics/channel_manager.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/metrics/state_reconciler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/metrics/state_reporter.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/metrics/task_controller.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/state_reconciler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/task_controller.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/metrics/downloader.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/metrics/task_fetcher.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/metrics/task_reporter.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/metrics/task_runner.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/function_allowlist.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/runtime_probes.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/task_fetcher.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/task_reporter.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/task_runner.py +0 -0
- {indexify-0.3.29 → indexify-0.3.30}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.3.
|
4
|
+
version = "0.3.30"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -78,13 +78,15 @@ def build_image(
|
|
78
78
|
|
79
79
|
|
80
80
|
@app.command(
|
81
|
-
|
81
|
+
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
82
|
+
help="Runs Executor that connects to the Indexify server and starts running its tasks",
|
82
83
|
)
|
83
84
|
def executor(
|
85
|
+
ctx: typer.Context,
|
84
86
|
server_addr: str = "localhost:8900",
|
85
87
|
grpc_server_addr: str = "localhost:8901",
|
86
|
-
|
87
|
-
bool, typer.Option("--
|
88
|
+
verbose_logs: Annotated[
|
89
|
+
bool, typer.Option("--verbose", "-v", help="Run the executor in verbose mode")
|
88
90
|
] = False,
|
89
91
|
function_uris: Annotated[
|
90
92
|
Optional[List[str]],
|
@@ -103,11 +105,6 @@ def executor(
|
|
103
105
|
executor_cache: Optional[str] = typer.Option(
|
104
106
|
"~/.indexify/executor_cache", help="Path to the executor cache directory"
|
105
107
|
),
|
106
|
-
# Registred ports range ends at 49151.
|
107
|
-
ports: Tuple[int, int] = typer.Option(
|
108
|
-
(50000, 51000),
|
109
|
-
help="Range of localhost TCP ports to be used by Function Executors",
|
110
|
-
),
|
111
108
|
monitoring_server_host: Annotated[
|
112
109
|
str,
|
113
110
|
typer.Option(
|
@@ -142,15 +139,11 @@ def executor(
|
|
142
139
|
),
|
143
140
|
] = False,
|
144
141
|
):
|
145
|
-
if
|
142
|
+
if verbose_logs:
|
146
143
|
compact_tracebacks: bool = os.getenv("INDEXIFY_COMPACT_TRACEBACKS", "1") == "1"
|
147
144
|
configure_development_mode_logging(compact_tracebacks=compact_tracebacks)
|
148
145
|
else:
|
149
146
|
configure_production_mode_logging()
|
150
|
-
if function_uris is None:
|
151
|
-
raise typer.BadParameter(
|
152
|
-
"At least one function must be specified when not running in development mode"
|
153
|
-
)
|
154
147
|
|
155
148
|
kv_labels: Dict[str, str] = {}
|
156
149
|
for label in labels:
|
@@ -170,30 +163,23 @@ def executor(
|
|
170
163
|
executor_version=executor_version,
|
171
164
|
labels=kv_labels,
|
172
165
|
executor_cache=executor_cache,
|
173
|
-
ports=ports,
|
174
166
|
functions=function_uris,
|
175
|
-
|
167
|
+
verbose_logs=verbose_logs,
|
176
168
|
monitoring_server_host=monitoring_server_host,
|
177
169
|
monitoring_server_port=monitoring_server_port,
|
178
170
|
enable_grpc_state_reconciler=enable_grpc_state_reconciler,
|
179
171
|
)
|
172
|
+
if ctx.args:
|
173
|
+
logger.warning(
|
174
|
+
"Unknown arguments passed to the executor",
|
175
|
+
unknown_args=ctx.args,
|
176
|
+
)
|
180
177
|
|
181
178
|
executor_cache = Path(executor_cache).expanduser().absolute()
|
182
179
|
if os.path.exists(executor_cache):
|
183
180
|
shutil.rmtree(executor_cache)
|
184
181
|
Path(executor_cache).mkdir(parents=True, exist_ok=True)
|
185
182
|
|
186
|
-
start_port: int = ports[0]
|
187
|
-
end_port: int = ports[1]
|
188
|
-
if start_port >= end_port:
|
189
|
-
console.print(
|
190
|
-
Text(
|
191
|
-
f"start port {start_port} should be less than {end_port}", style="red"
|
192
|
-
),
|
193
|
-
)
|
194
|
-
exit(1)
|
195
|
-
|
196
|
-
# Enable all available blob stores in OSS because we don't know which one is going to be used.
|
197
183
|
blob_store: BLOBStore = BLOBStore(
|
198
184
|
# Local FS mode is used in tests and in cases when user wants to store data on NFS.
|
199
185
|
local=LocalFSBLOBStore(),
|
@@ -219,17 +205,13 @@ def executor(
|
|
219
205
|
|
220
206
|
Executor(
|
221
207
|
id=executor_id,
|
222
|
-
development_mode=dev,
|
223
208
|
flavor=ExecutorFlavor.OSS,
|
224
209
|
version=executor_version,
|
225
210
|
labels=kv_labels,
|
226
211
|
health_checker=GenericHealthChecker(),
|
227
212
|
code_path=executor_cache,
|
228
213
|
function_allowlist=_parse_function_uris(function_uris),
|
229
|
-
function_executor_server_factory=SubprocessFunctionExecutorServerFactory(
|
230
|
-
development_mode=dev,
|
231
|
-
server_ports=range(ports[0], ports[1]),
|
232
|
-
),
|
214
|
+
function_executor_server_factory=SubprocessFunctionExecutorServerFactory(),
|
233
215
|
server_addr=server_addr,
|
234
216
|
grpc_server_addr=grpc_server_addr,
|
235
217
|
config_path=config_path,
|
@@ -57,7 +57,6 @@ class Executor:
|
|
57
57
|
def __init__(
|
58
58
|
self,
|
59
59
|
id: str,
|
60
|
-
development_mode: bool,
|
61
60
|
flavor: ExecutorFlavor,
|
62
61
|
version: str,
|
63
62
|
labels: Dict[str, str],
|
@@ -116,7 +115,6 @@ class Executor:
|
|
116
115
|
flavor=flavor,
|
117
116
|
version=version,
|
118
117
|
labels=labels,
|
119
|
-
development_mode=development_mode,
|
120
118
|
function_allowlist=self._function_allowlist,
|
121
119
|
function_executor_states=self._function_executor_states,
|
122
120
|
channel_manager=self._channel_manager,
|
@@ -173,7 +171,6 @@ class Executor:
|
|
173
171
|
|
174
172
|
executor_info: Dict[str, str] = {
|
175
173
|
"id": id,
|
176
|
-
"dev_mode": str(development_mode),
|
177
174
|
"flavor": flavor.name,
|
178
175
|
"version": version,
|
179
176
|
"code_path": str(code_path),
|
@@ -1,7 +1,8 @@
|
|
1
1
|
import asyncio
|
2
2
|
import os
|
3
3
|
import signal
|
4
|
-
|
4
|
+
import socket
|
5
|
+
from typing import Any, Optional
|
5
6
|
|
6
7
|
from .function_executor_server_factory import (
|
7
8
|
FunctionExecutorServerConfiguration,
|
@@ -10,15 +11,15 @@ from .function_executor_server_factory import (
|
|
10
11
|
from .subprocess_function_executor_server import SubprocessFunctionExecutorServer
|
11
12
|
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
self._free_ports: List[int] = list(reversed(server_ports))
|
14
|
+
def get_free_tcp_port(iface_name="localhost") -> int:
|
15
|
+
tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
16
|
+
tcp.bind((iface_name, 0))
|
17
|
+
_, port = tcp.getsockname()
|
18
|
+
tcp.close()
|
19
|
+
return port
|
20
|
+
|
21
21
|
|
22
|
+
class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
|
22
23
|
async def create(
|
23
24
|
self, config: FunctionExecutorServerConfiguration, logger: Any
|
24
25
|
) -> SubprocessFunctionExecutorServer:
|
@@ -32,14 +33,13 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
|
|
32
33
|
)
|
33
34
|
|
34
35
|
try:
|
35
|
-
port =
|
36
|
+
port = get_free_tcp_port()
|
37
|
+
logger.info("allocated function executor port", port=port)
|
36
38
|
args = [
|
37
39
|
f"--executor-id={config.executor_id}", # use = as executor_id can start with -
|
38
40
|
"--address",
|
39
41
|
_server_address(port),
|
40
42
|
]
|
41
|
-
if self._development_mode:
|
42
|
-
args.append("--dev")
|
43
43
|
# Run the process with our stdout, stderr. We want to see process logs and exceptions in our process output.
|
44
44
|
# This is useful for dubugging. Customer function stdout and stderr is captured and returned in the response
|
45
45
|
# so we won't see it in our process outputs. This is the right behavior as customer function stdout and stderr
|
@@ -56,8 +56,6 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
|
|
56
56
|
address=_server_address(port),
|
57
57
|
)
|
58
58
|
except Exception as e:
|
59
|
-
if port is not None:
|
60
|
-
self._release_port(port)
|
61
59
|
logger.error(
|
62
60
|
"failed starting a new Function Executor process at port {port}",
|
63
61
|
exc_info=e,
|
@@ -91,20 +89,6 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
|
|
91
89
|
"failed to cleanup Function Executor process",
|
92
90
|
exc_info=e,
|
93
91
|
)
|
94
|
-
finally:
|
95
|
-
self._release_port(port)
|
96
|
-
|
97
|
-
def _allocate_port(self) -> int:
|
98
|
-
# No asyncio.Lock is required here because this operation never awaits
|
99
|
-
# and it is always called from the same thread where the event loop is running.
|
100
|
-
return self._free_ports.pop()
|
101
|
-
|
102
|
-
def _release_port(self, port: int) -> None:
|
103
|
-
# No asyncio.Lock is required here because this operation never awaits
|
104
|
-
# and it is always called from the same thread where the event loop is running.
|
105
|
-
#
|
106
|
-
# Prefer port reuse to repro as many possible issues deterministically as possible.
|
107
|
-
self._free_ports.append(port)
|
108
92
|
|
109
93
|
|
110
94
|
def _server_address(port: int) -> str:
|
@@ -55,7 +55,6 @@ class ExecutorStateReporter:
|
|
55
55
|
flavor: ExecutorFlavor,
|
56
56
|
version: str,
|
57
57
|
labels: Dict[str, str],
|
58
|
-
development_mode: bool,
|
59
58
|
function_allowlist: Optional[List[FunctionURI]],
|
60
59
|
function_executor_states: FunctionExecutorStatesContainer,
|
61
60
|
channel_manager: ChannelManager,
|
@@ -67,7 +66,6 @@ class ExecutorStateReporter:
|
|
67
66
|
self._flavor: ExecutorFlavor = flavor
|
68
67
|
self._version: str = version
|
69
68
|
self._labels: Dict[str, str] = labels.copy()
|
70
|
-
self._development_mode: bool = development_mode
|
71
69
|
self._hostname: str = gethostname()
|
72
70
|
self._function_executor_states: FunctionExecutorStatesContainer = (
|
73
71
|
function_executor_states
|
@@ -153,7 +151,6 @@ class ExecutorStateReporter:
|
|
153
151
|
metric_state_report_rpcs.inc()
|
154
152
|
state = ExecutorState(
|
155
153
|
executor_id=self._executor_id,
|
156
|
-
development_mode=self._development_mode,
|
157
154
|
hostname=self._hostname,
|
158
155
|
flavor=_to_grpc_executor_flavor(self._flavor, self._logger),
|
159
156
|
version=self._version,
|
@@ -309,5 +306,9 @@ def _gpu_model_to_proto(gpu_model: NVIDIA_GPU_MODEL) -> GPUModelProto:
|
|
309
306
|
return GPUModelProto.GPU_MODEL_NVIDIA_H100_80GB
|
310
307
|
elif gpu_model == NVIDIA_GPU_MODEL.TESLA_T4:
|
311
308
|
return GPUModelProto.GPU_MODEL_NVIDIA_TESLA_T4
|
309
|
+
elif gpu_model == NVIDIA_GPU_MODEL.A6000:
|
310
|
+
return GPUModelProto.GPU_MODEL_NVIDIA_A6000
|
311
|
+
elif gpu_model == NVIDIA_GPU_MODEL.A10:
|
312
|
+
return GPUModelProto.GPU_MODEL_NVIDIA_A10
|
312
313
|
else:
|
313
314
|
return GPUModelProto.GPU_MODEL_UNKNOWN
|
@@ -6,12 +6,15 @@ from pydantic import BaseModel
|
|
6
6
|
|
7
7
|
|
8
8
|
# Only NVIDIA GPUs currently supported in Tensorlake SDK are listed here.
|
9
|
+
# GPU models coming with multiple memory sizes have a different enum value per memory size.
|
9
10
|
class NVIDIA_GPU_MODEL(str, Enum):
|
10
11
|
UNKNOWN = "UNKNOWN"
|
11
12
|
A100_40GB = "A100-40GB"
|
12
13
|
A100_80GB = "A100-80GB"
|
13
|
-
H100_80GB = "H100"
|
14
|
+
H100_80GB = "H100-80GB"
|
14
15
|
TESLA_T4 = "T4"
|
16
|
+
A6000 = "A6000"
|
17
|
+
A10 = "A10"
|
15
18
|
|
16
19
|
|
17
20
|
class NvidiaGPUInfo(BaseModel):
|
@@ -54,28 +57,39 @@ def fetch_nvidia_gpu_infos(logger: Any) -> List[NvidiaGPUInfo]:
|
|
54
57
|
# 1, NVIDIA A100-PCIE-40GB, GPU-e9c9aa65-bff3-405a-ab7c-dc879cc88169
|
55
58
|
# 2, NVIDIA H100 80GB HBM3, GPU-8c35f4c9-4dff-c9a2-866f-afb5d82e1dd7
|
56
59
|
# 3, Tesla T4, GPU-2a7fadae-a692-1c44-2c57-6645a0d117e4
|
60
|
+
# 4, NVIDIA RTX A6000, GPU-efe4927a-743f-e4cc-28bb-da604f545b6d
|
61
|
+
# 5, NVIDIA A10, GPU-12463b8c-40bb-7322-6c7a-ef48bd7bd39b
|
57
62
|
parts = line.split(",")
|
58
63
|
index = parts[0].strip()
|
59
64
|
product_name = parts[1].strip()
|
60
65
|
uuid = parts[2].strip()
|
61
66
|
|
62
|
-
model =
|
63
|
-
if
|
64
|
-
model = NVIDIA_GPU_MODEL.A100_80GB
|
65
|
-
if product_name.startswith("NVIDIA A100") and product_name.endswith("40GB"):
|
66
|
-
model = NVIDIA_GPU_MODEL.A100_40GB
|
67
|
-
elif product_name.startswith("NVIDIA H100"):
|
68
|
-
model = NVIDIA_GPU_MODEL.H100_80GB
|
69
|
-
elif product_name.startswith("Tesla T4"):
|
70
|
-
model = NVIDIA_GPU_MODEL.TESLA_T4
|
71
|
-
else:
|
67
|
+
model = _product_name_to_model(product_name)
|
68
|
+
if model == NVIDIA_GPU_MODEL.UNKNOWN:
|
72
69
|
logger.warning(
|
73
70
|
"Unknown GPU model was detected, ignoring", nvidia_smi_output=line
|
74
71
|
)
|
75
|
-
|
76
72
|
infos.append(
|
77
73
|
NvidiaGPUInfo(
|
78
74
|
index=index, uuid=uuid, product_name=product_name, model=model
|
79
75
|
)
|
80
76
|
)
|
77
|
+
|
81
78
|
return infos
|
79
|
+
|
80
|
+
|
81
|
+
def _product_name_to_model(product_name: str) -> NVIDIA_GPU_MODEL:
|
82
|
+
if product_name.startswith("NVIDIA A100") and product_name.endswith("80GB"):
|
83
|
+
return NVIDIA_GPU_MODEL.A100_80GB
|
84
|
+
if product_name.startswith("NVIDIA A100") and product_name.endswith("40GB"):
|
85
|
+
return NVIDIA_GPU_MODEL.A100_40GB
|
86
|
+
elif product_name.startswith("NVIDIA H100") and "80GB" in product_name:
|
87
|
+
return NVIDIA_GPU_MODEL.H100_80GB
|
88
|
+
elif product_name.startswith("Tesla T4"):
|
89
|
+
return NVIDIA_GPU_MODEL.TESLA_T4
|
90
|
+
elif product_name.startswith("NVIDIA RTX A6000"):
|
91
|
+
return NVIDIA_GPU_MODEL.A6000
|
92
|
+
elif product_name.startswith("NVIDIA A10"):
|
93
|
+
return NVIDIA_GPU_MODEL.A10
|
94
|
+
else:
|
95
|
+
return NVIDIA_GPU_MODEL.UNKNOWN
|
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
|
|
19
19
|
|
20
20
|
|
21
21
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"k\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_modelJ\x04\x08\x03\x10\x04"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xbe\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusJ\x04\x08\x03\x10\x04"\xc3\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\xa0\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"k\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_modelJ\x04\x08\x03\x10\x04"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xbe\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusJ\x04\x08\x03\x10\x04"\xc3\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
23
23
|
)
|
24
24
|
|
25
25
|
_globals = globals()
|
@@ -34,17 +34,17 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
34
34
|
_globals["_DATAPAYLOADENCODING"]._serialized_start = 4857
|
35
35
|
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5028
|
36
36
|
_globals["_GPUMODEL"]._serialized_start = 5031
|
37
|
-
_globals["_GPUMODEL"]._serialized_end =
|
38
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start =
|
39
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end =
|
40
|
-
_globals["_EXECUTORSTATUS"]._serialized_start =
|
41
|
-
_globals["_EXECUTORSTATUS"]._serialized_end =
|
42
|
-
_globals["_EXECUTORFLAVOR"]._serialized_start =
|
43
|
-
_globals["_EXECUTORFLAVOR"]._serialized_end =
|
44
|
-
_globals["_TASKOUTCOME"]._serialized_start =
|
45
|
-
_globals["_TASKOUTCOME"]._serialized_end =
|
46
|
-
_globals["_OUTPUTENCODING"]._serialized_start =
|
47
|
-
_globals["_OUTPUTENCODING"]._serialized_end =
|
37
|
+
_globals["_GPUMODEL"]._serialized_end = 5245
|
38
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5248
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5706
|
40
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 5709
|
41
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 5904
|
42
|
+
_globals["_EXECUTORFLAVOR"]._serialized_start = 5906
|
43
|
+
_globals["_EXECUTORFLAVOR"]._serialized_end = 6006
|
44
|
+
_globals["_TASKOUTCOME"]._serialized_start = 6008
|
45
|
+
_globals["_TASKOUTCOME"]._serialized_end = 6099
|
46
|
+
_globals["_OUTPUTENCODING"]._serialized_start = 6101
|
47
|
+
_globals["_OUTPUTENCODING"]._serialized_end = 6228
|
48
48
|
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
49
|
_globals["_DATAPAYLOAD"]._serialized_end = 318
|
50
50
|
_globals["_GPURESOURCES"]._serialized_start = 320
|
@@ -81,6 +81,6 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
81
81
|
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4825
|
82
82
|
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4827
|
83
83
|
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4854
|
84
|
-
_globals["_EXECUTORAPI"]._serialized_start =
|
85
|
-
_globals["_EXECUTORAPI"]._serialized_end =
|
84
|
+
_globals["_EXECUTORAPI"]._serialized_start = 6231
|
85
|
+
_globals["_EXECUTORAPI"]._serialized_end = 6598
|
86
86
|
# @@protoc_insertion_point(module_scope)
|
@@ -25,6 +25,8 @@ class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
25
25
|
GPU_MODEL_NVIDIA_A100_80GB: _ClassVar[GPUModel]
|
26
26
|
GPU_MODEL_NVIDIA_H100_80GB: _ClassVar[GPUModel]
|
27
27
|
GPU_MODEL_NVIDIA_TESLA_T4: _ClassVar[GPUModel]
|
28
|
+
GPU_MODEL_NVIDIA_A6000: _ClassVar[GPUModel]
|
29
|
+
GPU_MODEL_NVIDIA_A10: _ClassVar[GPUModel]
|
28
30
|
|
29
31
|
class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
30
32
|
__slots__ = ()
|
@@ -80,6 +82,8 @@ GPU_MODEL_NVIDIA_A100_40GB: GPUModel
|
|
80
82
|
GPU_MODEL_NVIDIA_A100_80GB: GPUModel
|
81
83
|
GPU_MODEL_NVIDIA_H100_80GB: GPUModel
|
82
84
|
GPU_MODEL_NVIDIA_TESLA_T4: GPUModel
|
85
|
+
GPU_MODEL_NVIDIA_A6000: GPUModel
|
86
|
+
GPU_MODEL_NVIDIA_A10: GPUModel
|
83
87
|
FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
|
84
88
|
FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
|
85
89
|
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/function_executor/single_task_runner.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/grpc/function_executor_controller.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.3.29 → indexify-0.3.30}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|