indexify 0.3.27__py3-none-any.whl → 0.3.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/cli.py +23 -12
- indexify/executor/function_executor/function_executor_state.py +1 -5
- indexify/executor/function_executor/function_executor_states_container.py +4 -0
- indexify/executor/function_executor/health_checker.py +3 -1
- indexify/executor/function_executor/task_output.py +1 -1
- indexify/executor/grpc/function_executor_controller.py +10 -4
- indexify/executor/grpc/state_reconciler.py +12 -9
- indexify/executor/grpc/state_reporter.py +36 -14
- indexify/executor/grpc/task_controller.py +2 -1
- indexify/executor/host_resources/host_resources.py +71 -17
- indexify/executor/host_resources/nvidia_gpu.py +4 -0
- indexify/executor/task_reporter.py +1 -4
- indexify/proto/executor_api.proto +6 -7
- indexify/proto/executor_api_pb2.py +40 -40
- indexify/proto/executor_api_pb2.pyi +9 -8
- {indexify-0.3.27.dist-info → indexify-0.3.29.dist-info}/METADATA +2 -1
- {indexify-0.3.27.dist-info → indexify-0.3.29.dist-info}/RECORD +19 -19
- {indexify-0.3.27.dist-info → indexify-0.3.29.dist-info}/WHEEL +0 -0
- {indexify-0.3.27.dist-info → indexify-0.3.29.dist-info}/entry_points.txt +0 -0
indexify/cli/cli.py
CHANGED
@@ -69,7 +69,7 @@ def build_image(
|
|
69
69
|
exec(open(workflow_file_path).read(), globals_dict)
|
70
70
|
except FileNotFoundError as e:
|
71
71
|
raise Exception(
|
72
|
-
f"Could not find workflow file to execute at:
|
72
|
+
f"Could not find workflow file to execute at: `{workflow_file_path}`"
|
73
73
|
)
|
74
74
|
for _, obj in globals_dict.items():
|
75
75
|
if type(obj) and isinstance(obj, Image):
|
@@ -122,6 +122,15 @@ def executor(
|
|
122
122
|
help="Port where to run Executor Monitoring server",
|
123
123
|
),
|
124
124
|
] = 7000,
|
125
|
+
labels: Annotated[
|
126
|
+
List[str],
|
127
|
+
typer.Option(
|
128
|
+
"--label",
|
129
|
+
"-l",
|
130
|
+
help="Executor key-value label to be sent to the Server. "
|
131
|
+
"Specified as <key>=<value>",
|
132
|
+
),
|
133
|
+
] = [],
|
125
134
|
enable_grpc_state_reconciler: Annotated[
|
126
135
|
bool,
|
127
136
|
typer.Option(
|
@@ -132,18 +141,10 @@ def executor(
|
|
132
141
|
),
|
133
142
|
),
|
134
143
|
] = False,
|
135
|
-
labels: Annotated[
|
136
|
-
List[str],
|
137
|
-
typer.Option(
|
138
|
-
"--label",
|
139
|
-
"-l",
|
140
|
-
help="Executor key-value label to be sent to the Server. "
|
141
|
-
"Specified as <key>=<value>",
|
142
|
-
),
|
143
|
-
] = [],
|
144
144
|
):
|
145
145
|
if dev:
|
146
|
-
|
146
|
+
compact_tracebacks: bool = os.getenv("INDEXIFY_COMPACT_TRACEBACKS", "1") == "1"
|
147
|
+
configure_development_mode_logging(compact_tracebacks=compact_tracebacks)
|
147
148
|
else:
|
148
149
|
configure_production_mode_logging()
|
149
150
|
if function_uris is None:
|
@@ -200,6 +201,16 @@ def executor(
|
|
200
201
|
s3=S3BLOBStore(),
|
201
202
|
)
|
202
203
|
|
204
|
+
host_resources_provider: HostResourcesProvider = HostResourcesProvider(
|
205
|
+
gpu_allocator=NvidiaGPUAllocator(logger),
|
206
|
+
# Assuming a simple setup in OSS where Executor container has a single file system
|
207
|
+
# used by all Function Executors and all the container resources are available to all Function Executors.
|
208
|
+
function_executors_ephimeral_disks_path="/",
|
209
|
+
host_overhead_cpus=0,
|
210
|
+
host_overhead_memory_gb=0,
|
211
|
+
host_overhead_function_executors_ephimeral_disks_gb=0,
|
212
|
+
)
|
213
|
+
|
203
214
|
prometheus_client.Info("cli", "CLI information").info(
|
204
215
|
{
|
205
216
|
"package": "indexify",
|
@@ -226,7 +237,7 @@ def executor(
|
|
226
237
|
monitoring_server_port=monitoring_server_port,
|
227
238
|
enable_grpc_state_reconciler=enable_grpc_state_reconciler,
|
228
239
|
blob_store=blob_store,
|
229
|
-
host_resources_provider=
|
240
|
+
host_resources_provider=host_resources_provider,
|
230
241
|
).run()
|
231
242
|
|
232
243
|
|
@@ -49,7 +49,6 @@ class FunctionExecutorState:
|
|
49
49
|
# TODO: Move graph_version to immutable fields once we migrate to gRPC State Reconciler.
|
50
50
|
self.graph_version: str = graph_version
|
51
51
|
self.status: FunctionExecutorStatus = FunctionExecutorStatus.DESTROYED
|
52
|
-
self.status_message: str = ""
|
53
52
|
self.status_change_notifier: asyncio.Condition = asyncio.Condition(
|
54
53
|
lock=self.lock
|
55
54
|
)
|
@@ -65,9 +64,7 @@ class FunctionExecutorState:
|
|
65
64
|
while self.status not in allowlist:
|
66
65
|
await self.status_change_notifier.wait()
|
67
66
|
|
68
|
-
async def set_status(
|
69
|
-
self, new_status: FunctionExecutorStatus, status_message: str = ""
|
70
|
-
) -> None:
|
67
|
+
async def set_status(self, new_status: FunctionExecutorStatus) -> None:
|
71
68
|
"""Sets the status of the Function Executor.
|
72
69
|
|
73
70
|
The caller must hold the lock.
|
@@ -84,7 +81,6 @@ class FunctionExecutorState:
|
|
84
81
|
metric_function_executors_with_status.labels(status=self.status.name).dec()
|
85
82
|
metric_function_executors_with_status.labels(status=new_status.name).inc()
|
86
83
|
self.status = new_status
|
87
|
-
self.status_message = status_message
|
88
84
|
self.status_change_notifier.notify_all()
|
89
85
|
else:
|
90
86
|
raise ValueError(
|
@@ -71,6 +71,10 @@ class FunctionExecutorStatesContainer:
|
|
71
71
|
metric_function_executor_states_count.set(len(self._states))
|
72
72
|
return state
|
73
73
|
|
74
|
+
def exists(self, id: str) -> bool:
|
75
|
+
"""Check if the state with the given ID exists."""
|
76
|
+
return id in self._states
|
77
|
+
|
74
78
|
async def shutdown(self):
|
75
79
|
# Function Executors are outside the Executor process
|
76
80
|
# so they need to get cleaned up explicitly and reliably.
|
@@ -19,7 +19,9 @@ from .metrics.health_checker import (
|
|
19
19
|
)
|
20
20
|
from .server.client_configuration import HEALTH_CHECK_TIMEOUT_SEC
|
21
21
|
|
22
|
-
|
22
|
+
# Use lowest feasible value for now to detect FE crashes quickly because
|
23
|
+
# we're only doing periodic health checks now.
|
24
|
+
HEALTH_CHECK_POLL_PERIOD_SEC = 5
|
23
25
|
|
24
26
|
|
25
27
|
class HealthCheckResult:
|
@@ -99,7 +99,7 @@ class TaskOutput:
|
|
99
99
|
function_name=function_name,
|
100
100
|
graph_version=graph_version,
|
101
101
|
graph_invocation_id=graph_invocation_id,
|
102
|
-
stderr=f"Function exceeded its configured timeout of {timeout_sec:.3f} sec.",
|
102
|
+
stderr=f"Function or router exceeded its configured timeout of {timeout_sec:.3f} sec.",
|
103
103
|
is_internal_error=False,
|
104
104
|
output_payload_uri_prefix=output_payload_uri_prefix,
|
105
105
|
)
|
@@ -250,7 +250,6 @@ class FunctionExecutorController:
|
|
250
250
|
)
|
251
251
|
|
252
252
|
next_status: FunctionExecutorStatus = FunctionExecutorStatus.IDLE
|
253
|
-
next_status_message: str = ""
|
254
253
|
async with _UnlockedLockContextManager(self._function_executor_state.lock):
|
255
254
|
try:
|
256
255
|
function_executor: FunctionExecutor = await _create_function_executor(
|
@@ -264,13 +263,20 @@ class FunctionExecutorController:
|
|
264
263
|
)
|
265
264
|
except CustomerError as e:
|
266
265
|
next_status = FunctionExecutorStatus.STARTUP_FAILED_CUSTOMER_ERROR
|
267
|
-
|
266
|
+
# TODO: Save stdout and stderr of customer code that ran during FE creation into BLOBs and uncomment the corresponding tests.
|
267
|
+
self._logger.error(
|
268
|
+
"failed to create function executor due to error in customer code",
|
269
|
+
exc_info=e,
|
270
|
+
)
|
268
271
|
except Exception as e:
|
269
272
|
next_status = FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR
|
270
|
-
self._logger.error(
|
273
|
+
self._logger.error(
|
274
|
+
"failed to create function executor due to platform error",
|
275
|
+
exc_info=e,
|
276
|
+
)
|
271
277
|
|
272
278
|
# FE state lock is acquired again at this point.
|
273
|
-
await self._function_executor_state.set_status(next_status
|
279
|
+
await self._function_executor_state.set_status(next_status)
|
274
280
|
|
275
281
|
if next_status == FunctionExecutorStatus.IDLE:
|
276
282
|
# Task controllers will notice that this FE is IDLE and start running on it one by one.
|
@@ -137,14 +137,16 @@ class ExecutorStateReconciler:
|
|
137
137
|
)
|
138
138
|
continue
|
139
139
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
140
|
+
# TODO: The clock is only incremented when function executors have actionable changes and not on new allocations.
|
141
|
+
# Therefore the clock cannot currently be used as an idempotency token.
|
142
|
+
# if self._last_server_clock is not None:
|
143
|
+
# if self._last_server_clock >= new_state.clock:
|
144
|
+
# self._logger.warning(
|
145
|
+
# "received outdated DesiredExecutorState from Server, ignoring",
|
146
|
+
# current_clock=self._last_server_clock,
|
147
|
+
# ignored_clock=new_state.clock,
|
148
|
+
# )
|
149
|
+
# continue # Duplicate or outdated message state sent by Server.
|
148
150
|
|
149
151
|
self._last_server_clock = new_state.clock
|
150
152
|
# Always read the latest desired state value from the stream so
|
@@ -272,7 +274,8 @@ class ExecutorStateReconciler:
|
|
272
274
|
|
273
275
|
Doesn't block on any long running operations. Doesn't raise any exceptions.
|
274
276
|
"""
|
275
|
-
|
277
|
+
|
278
|
+
if not self._function_executor_states.exists(function_executor_description.id):
|
276
279
|
await self._create_function_executor(function_executor_description)
|
277
280
|
|
278
281
|
async def _create_function_executor(
|
@@ -73,11 +73,11 @@ class ExecutorStateReporter:
|
|
73
73
|
function_executor_states
|
74
74
|
)
|
75
75
|
self._channel_manager = channel_manager
|
76
|
+
self._host_resources_provider: HostResourcesProvider = host_resources_provider
|
76
77
|
self._logger: Any = logger.bind(module=__name__)
|
77
78
|
self._reporting_interval_sec: int = reporting_interval_sec
|
78
|
-
self._total_host_resources: HostResourcesProto =
|
79
|
-
|
80
|
-
)
|
79
|
+
self._total_host_resources: Optional[HostResourcesProto] = None
|
80
|
+
self._total_function_executor_resources: Optional[HostResourcesProto] = None
|
81
81
|
|
82
82
|
self._is_shutdown: bool = False
|
83
83
|
self._executor_status: ExecutorStatus = ExecutorStatus.EXECUTOR_STATUS_UNKNOWN
|
@@ -85,7 +85,9 @@ class ExecutorStateReporter:
|
|
85
85
|
function_allowlist
|
86
86
|
)
|
87
87
|
self._labels.update(_label_values_to_strings(RuntimeProbes().probe().labels))
|
88
|
-
self._last_server_clock:
|
88
|
+
self._last_server_clock: int = (
|
89
|
+
0 # Server expects initial value to be 0 until it is set by Server.
|
90
|
+
)
|
89
91
|
|
90
92
|
def update_executor_status(self, value: ExecutorStatus):
|
91
93
|
self._executor_status = value
|
@@ -98,7 +100,7 @@ class ExecutorStateReporter:
|
|
98
100
|
|
99
101
|
Never raises any exceptions.
|
100
102
|
"""
|
101
|
-
# TODO: Move this into a new async task and cancel it in shutdown().
|
103
|
+
# TODO: Move this method into a new async task and cancel it in shutdown().
|
102
104
|
while not self._is_shutdown:
|
103
105
|
stub = ExecutorAPIStub(await self._channel_manager.get_channel())
|
104
106
|
while not self._is_shutdown:
|
@@ -111,19 +113,39 @@ class ExecutorStateReporter:
|
|
111
113
|
await asyncio.sleep(self._reporting_interval_sec)
|
112
114
|
except Exception as e:
|
113
115
|
self._logger.error(
|
114
|
-
f"
|
116
|
+
f"failed to report state to the server, reconnecting in {_REPORT_BACKOFF_ON_ERROR_SEC} sec.",
|
115
117
|
exc_info=e,
|
116
118
|
)
|
117
119
|
await asyncio.sleep(_REPORT_BACKOFF_ON_ERROR_SEC)
|
118
120
|
break
|
119
121
|
|
120
|
-
self._logger.info("
|
122
|
+
self._logger.info("state reporter shutdown")
|
121
123
|
|
122
124
|
async def report_state(self, stub: ExecutorAPIStub):
|
123
125
|
"""Reports the current state to the server represented by the supplied stub.
|
124
126
|
|
125
127
|
Raises exceptions on failure.
|
126
128
|
"""
|
129
|
+
if self._total_host_resources is None:
|
130
|
+
# We need to fetch total resources only once, because they are not changing.
|
131
|
+
total_host_resources: HostResources = (
|
132
|
+
await self._host_resources_provider.total_host_resources(self._logger)
|
133
|
+
)
|
134
|
+
total_function_executor_resources: HostResources = (
|
135
|
+
await self._host_resources_provider.total_function_executor_resources(
|
136
|
+
self._logger
|
137
|
+
)
|
138
|
+
)
|
139
|
+
self._logger.info(
|
140
|
+
"detected host resources",
|
141
|
+
total_host_resources=total_host_resources,
|
142
|
+
total_function_executor_resources=total_function_executor_resources,
|
143
|
+
)
|
144
|
+
self._total_host_resources = _host_resources_to_proto(total_host_resources)
|
145
|
+
self._total_function_executor_resources = _host_resources_to_proto(
|
146
|
+
total_function_executor_resources
|
147
|
+
)
|
148
|
+
|
127
149
|
with (
|
128
150
|
metric_state_report_errors.count_exceptions(),
|
129
151
|
metric_state_report_latency.time(),
|
@@ -136,16 +158,15 @@ class ExecutorStateReporter:
|
|
136
158
|
flavor=_to_grpc_executor_flavor(self._flavor, self._logger),
|
137
159
|
version=self._version,
|
138
160
|
status=self._executor_status,
|
139
|
-
|
140
|
-
free_resources=self._total_host_resources,
|
161
|
+
total_function_executor_resources=self._total_function_executor_resources,
|
141
162
|
total_resources=self._total_host_resources,
|
142
163
|
allowed_functions=self._allowed_functions,
|
143
164
|
function_executor_states=await self._fetch_function_executor_states(),
|
144
165
|
labels=self._labels,
|
145
166
|
)
|
146
167
|
state.state_hash = _state_hash(state)
|
147
|
-
|
148
|
-
|
168
|
+
# Set fields not included in the state hash.
|
169
|
+
state.server_clock = self._last_server_clock
|
149
170
|
|
150
171
|
await stub.report_executor_state(
|
151
172
|
ReportExecutorStateRequest(executor_state=state),
|
@@ -176,7 +197,6 @@ class ExecutorStateReporter:
|
|
176
197
|
status=_to_grpc_function_executor_status(
|
177
198
|
function_executor_state.status, self._logger
|
178
199
|
),
|
179
|
-
status_message=function_executor_state.status_message,
|
180
200
|
)
|
181
201
|
if function_executor_state.image_uri:
|
182
202
|
function_executor_state_proto.description.image_uri = (
|
@@ -227,7 +247,7 @@ def _to_grpc_function_executor_status(
|
|
227
247
|
)
|
228
248
|
|
229
249
|
if result == FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_UNKNOWN:
|
230
|
-
logger.error("
|
250
|
+
logger.error("unexpected Function Executor status", status=status)
|
231
251
|
|
232
252
|
return result
|
233
253
|
|
@@ -246,7 +266,7 @@ def _to_grpc_executor_flavor(
|
|
246
266
|
)
|
247
267
|
|
248
268
|
if result == ExecutorFlavorProto.EXECUTOR_FLAVOR_UNKNOWN:
|
249
|
-
logger.error("
|
269
|
+
logger.error("unexpected Executor flavor", flavor=flavor)
|
250
270
|
|
251
271
|
return result
|
252
272
|
|
@@ -287,5 +307,7 @@ def _gpu_model_to_proto(gpu_model: NVIDIA_GPU_MODEL) -> GPUModelProto:
|
|
287
307
|
return GPUModelProto.GPU_MODEL_NVIDIA_A100_80GB
|
288
308
|
elif gpu_model == NVIDIA_GPU_MODEL.H100_80GB:
|
289
309
|
return GPUModelProto.GPU_MODEL_NVIDIA_H100_80GB
|
310
|
+
elif gpu_model == NVIDIA_GPU_MODEL.TESLA_T4:
|
311
|
+
return GPUModelProto.GPU_MODEL_NVIDIA_TESLA_T4
|
290
312
|
else:
|
291
313
|
return GPUModelProto.GPU_MODEL_UNKNOWN
|
@@ -203,7 +203,7 @@ class TaskController:
|
|
203
203
|
reducer_output_key=(
|
204
204
|
self._task.reducer_output_key
|
205
205
|
if self._task.HasField("reducer_output_key")
|
206
|
-
else
|
206
|
+
else None
|
207
207
|
),
|
208
208
|
data_payload=(
|
209
209
|
self._task.reducer_input
|
@@ -233,6 +233,7 @@ class TaskController:
|
|
233
233
|
# and no other tasks run on this FE because it'd result in undefined behavior.
|
234
234
|
if self._is_timed_out:
|
235
235
|
next_status = FunctionExecutorStatus.UNHEALTHY
|
236
|
+
# TODO: When task controller is removed do FE health check here to stop scheduling tasks on unhealthy FE asap.
|
236
237
|
await self._release_function_executor(next_status=next_status)
|
237
238
|
|
238
239
|
async def _acquire_function_executor(self) -> None:
|
@@ -1,5 +1,7 @@
|
|
1
|
-
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, List, Optional
|
2
3
|
|
4
|
+
import psutil
|
3
5
|
from pydantic import BaseModel
|
4
6
|
|
5
7
|
from .nvidia_gpu import NvidiaGPUInfo
|
@@ -18,33 +20,85 @@ class HostResourcesProvider:
|
|
18
20
|
HostResourcesProvider is a class that provides information about the host resources.
|
19
21
|
"""
|
20
22
|
|
21
|
-
def __init__(
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
gpu_allocator: NvidiaGPUAllocator,
|
26
|
+
function_executors_ephimeral_disks_path: str,
|
27
|
+
host_overhead_cpus: int,
|
28
|
+
host_overhead_memory_gb: int,
|
29
|
+
host_overhead_function_executors_ephimeral_disks_gb: int,
|
30
|
+
):
|
31
|
+
"""Creates a HostResourcesProvider.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
gpu_allocator: The GPU allocator to use for GPU information.
|
35
|
+
function_executors_ephimeral_disks_path: The path to file system used as ephimeral disk space by Function Executors.
|
36
|
+
host_overhead_cpus: The number of CPUs reserved for use by host (can't be used by Function Executors).
|
37
|
+
host_overhead_memory_gb: The amount of memory reserved for use by host (can't be used by Function Executors).
|
38
|
+
host_overhead_function_executors_ephimeral_disks_gb: The amount of ephimeral disk space reserved for use by host (can't be used by Function Executors).
|
39
|
+
"""
|
22
40
|
self._gpu_allocator: NvidiaGPUAllocator = gpu_allocator
|
41
|
+
self._function_executors_ephimeral_disks_path: str = (
|
42
|
+
function_executors_ephimeral_disks_path
|
43
|
+
)
|
44
|
+
self._host_overhead_cpus: int = host_overhead_cpus
|
45
|
+
self._host_overhead_memory_gb: int = host_overhead_memory_gb
|
46
|
+
self._host_overhead_function_executors_ephimeral_disks_gb: int = (
|
47
|
+
host_overhead_function_executors_ephimeral_disks_gb
|
48
|
+
)
|
23
49
|
|
24
|
-
def
|
50
|
+
async def total_host_resources(self, logger: Any) -> HostResources:
|
25
51
|
"""Returns all hardware resources that exist at the host.
|
26
52
|
|
27
53
|
Raises Exception on error.
|
28
54
|
"""
|
29
|
-
|
30
|
-
|
31
|
-
return HostResources(
|
32
|
-
cpu_count=0, # TODO: Implement for Linux and MacOS hosts
|
33
|
-
memory_mb=0, # TODO: Implement for Linux and MacOS hosts
|
34
|
-
disk_mb=0, # TODO: Implement for Linux and MacOS hosts
|
35
|
-
gpus=self._gpu_allocator.list_all(),
|
36
|
-
)
|
55
|
+
# Run psutil library calls in a separate thread to not block the event loop.
|
56
|
+
return await asyncio.to_thread(self._total_host_resources, logger=logger)
|
37
57
|
|
38
|
-
def
|
39
|
-
"""Returns all hardware resources that are
|
58
|
+
async def total_function_executor_resources(self, logger: Any) -> HostResources:
|
59
|
+
"""Returns all hardware resources on the host that are usable by Function Executors.
|
40
60
|
|
41
61
|
Raises Exception on error.
|
42
62
|
"""
|
63
|
+
total_resources: HostResources = await self.total_host_resources(logger=logger)
|
64
|
+
return HostResources(
|
65
|
+
cpu_count=max(0, total_resources.cpu_count - self._host_overhead_cpus),
|
66
|
+
memory_mb=max(
|
67
|
+
0, total_resources.memory_mb - self._host_overhead_memory_gb * 1024
|
68
|
+
),
|
69
|
+
disk_mb=max(
|
70
|
+
0,
|
71
|
+
total_resources.disk_mb
|
72
|
+
- self._host_overhead_function_executors_ephimeral_disks_gb * 1024,
|
73
|
+
),
|
74
|
+
gpus=total_resources.gpus,
|
75
|
+
)
|
76
|
+
|
77
|
+
def _total_host_resources(self, logger: Any) -> HostResources:
|
43
78
|
logger = logger.bind(module=__name__)
|
44
79
|
|
80
|
+
# If users disable Hyper-Threading in OS then we'd only see physical cores here.
|
81
|
+
# This allows users to control if logical or physical cores are used for resource
|
82
|
+
# reporting and for running the functions.
|
83
|
+
cpu_count: Optional[int] = psutil.cpu_count(logical=True)
|
84
|
+
if cpu_count is None:
|
85
|
+
logger.warning(
|
86
|
+
"Unable to determine CPU count. Defaulting to 0.",
|
87
|
+
cpu_count=cpu_count,
|
88
|
+
)
|
89
|
+
cpu_count = 0
|
90
|
+
|
91
|
+
memory_mb: int = int(psutil.virtual_memory().total / 1024 / 1024)
|
92
|
+
disk_mb = int(
|
93
|
+
psutil.disk_usage(self._function_executors_ephimeral_disks_path).total
|
94
|
+
/ 1024
|
95
|
+
/ 1024
|
96
|
+
)
|
97
|
+
all_gpus: List[NvidiaGPUInfo] = self._gpu_allocator.list_all()
|
98
|
+
|
45
99
|
return HostResources(
|
46
|
-
cpu_count=
|
47
|
-
memory_mb=
|
48
|
-
disk_mb=
|
49
|
-
gpus=
|
100
|
+
cpu_count=cpu_count,
|
101
|
+
memory_mb=memory_mb,
|
102
|
+
disk_mb=disk_mb,
|
103
|
+
gpus=all_gpus,
|
50
104
|
)
|
@@ -11,6 +11,7 @@ class NVIDIA_GPU_MODEL(str, Enum):
|
|
11
11
|
A100_40GB = "A100-40GB"
|
12
12
|
A100_80GB = "A100-80GB"
|
13
13
|
H100_80GB = "H100"
|
14
|
+
TESLA_T4 = "T4"
|
14
15
|
|
15
16
|
|
16
17
|
class NvidiaGPUInfo(BaseModel):
|
@@ -52,6 +53,7 @@ def fetch_nvidia_gpu_infos(logger: Any) -> List[NvidiaGPUInfo]:
|
|
52
53
|
# 0, NVIDIA A100-SXM4-80GB, GPU-89fdc1e1-18b2-f499-c12b-82bcb9bfb3fa
|
53
54
|
# 1, NVIDIA A100-PCIE-40GB, GPU-e9c9aa65-bff3-405a-ab7c-dc879cc88169
|
54
55
|
# 2, NVIDIA H100 80GB HBM3, GPU-8c35f4c9-4dff-c9a2-866f-afb5d82e1dd7
|
56
|
+
# 3, Tesla T4, GPU-2a7fadae-a692-1c44-2c57-6645a0d117e4
|
55
57
|
parts = line.split(",")
|
56
58
|
index = parts[0].strip()
|
57
59
|
product_name = parts[1].strip()
|
@@ -64,6 +66,8 @@ def fetch_nvidia_gpu_infos(logger: Any) -> List[NvidiaGPUInfo]:
|
|
64
66
|
model = NVIDIA_GPU_MODEL.A100_40GB
|
65
67
|
elif product_name.startswith("NVIDIA H100"):
|
66
68
|
model = NVIDIA_GPU_MODEL.H100_80GB
|
69
|
+
elif product_name.startswith("Tesla T4"):
|
70
|
+
model = NVIDIA_GPU_MODEL.TESLA_T4
|
67
71
|
else:
|
68
72
|
logger.warning(
|
69
73
|
"Unknown GPU model was detected, ignoring", nvidia_smi_output=line
|
@@ -323,15 +323,12 @@ class TaskReporter:
|
|
323
323
|
invocation_id=output.graph_invocation_id,
|
324
324
|
executor_id=self._executor_id,
|
325
325
|
task_id=output.task_id,
|
326
|
+
reducer=output.reducer,
|
326
327
|
)
|
327
328
|
output_files: List[Any] = []
|
328
|
-
if output is None:
|
329
|
-
return task_result, output_files
|
330
|
-
|
331
329
|
task_result.outcome = (
|
332
330
|
TASK_OUTCOME_SUCCESS if output.success else TASK_OUTCOME_FAILURE
|
333
331
|
)
|
334
|
-
task_result.reducer = output.reducer
|
335
332
|
|
336
333
|
_process_function_output(
|
337
334
|
function_output=output.function_output, output_files=output_files
|
@@ -32,6 +32,7 @@ enum GPUModel {
|
|
32
32
|
GPU_MODEL_NVIDIA_A100_40GB = 1;
|
33
33
|
GPU_MODEL_NVIDIA_A100_80GB = 2;
|
34
34
|
GPU_MODEL_NVIDIA_H100_80GB = 3;
|
35
|
+
GPU_MODEL_NVIDIA_TESLA_T4 = 4;
|
35
36
|
}
|
36
37
|
|
37
38
|
// Free GPUs available at the Executor.
|
@@ -102,10 +103,7 @@ message FunctionExecutorDescription {
|
|
102
103
|
message FunctionExecutorState {
|
103
104
|
optional FunctionExecutorDescription description = 1;
|
104
105
|
optional FunctionExecutorStatus status = 2;
|
105
|
-
|
106
|
-
// Currently it contains error message from customer code
|
107
|
-
// if status is FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR.
|
108
|
-
optional string status_message = 3;
|
106
|
+
reserved 3;
|
109
107
|
}
|
110
108
|
|
111
109
|
enum ExecutorStatus {
|
@@ -130,10 +128,10 @@ message ExecutorState {
|
|
130
128
|
optional ExecutorFlavor flavor = 4;
|
131
129
|
optional string version = 5;
|
132
130
|
optional ExecutorStatus status = 6;
|
133
|
-
// Total resources
|
131
|
+
// Total resources at the Executor.
|
134
132
|
optional HostResources total_resources = 13;
|
135
|
-
//
|
136
|
-
optional HostResources
|
133
|
+
// Total resources usable by Function Executors.
|
134
|
+
optional HostResources total_function_executor_resources = 7;
|
137
135
|
// Empty allowed_functions list means that any function can run on the Executor.
|
138
136
|
repeated AllowedFunction allowed_functions = 8;
|
139
137
|
repeated FunctionExecutorState function_executor_states = 9;
|
@@ -141,6 +139,7 @@ message ExecutorState {
|
|
141
139
|
optional string state_hash = 11;
|
142
140
|
// Server supplied clock value of the latest desired executor state that was
|
143
141
|
// reconciled by Executor. Not included into state_hash.
|
142
|
+
// Initial value on Executor startup is 0.
|
144
143
|
optional uint64 server_clock = 12;
|
145
144
|
}
|
146
145
|
|
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
|
|
19
19
|
|
20
20
|
|
21
21
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"k\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_modelJ\x04\x08\x03\x10\x04"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xe8\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12\x1b\n\x0estatus_message\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x11\n\x0f_status_message"\x9d\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12;\n\x0e\x66ree_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB\x11\n\x0f_free_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\x81\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"k\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_modelJ\x04\x08\x03\x10\x04"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xbe\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusJ\x04\x08\x03\x10\x04"\xc3\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\xa0\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
|
23
23
|
)
|
24
24
|
|
25
25
|
_globals = globals()
|
@@ -31,20 +31,20 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
31
31
|
DESCRIPTOR._loaded_options = None
|
32
32
|
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
33
|
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_start =
|
35
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_end =
|
36
|
-
_globals["_GPUMODEL"]._serialized_start =
|
37
|
-
_globals["_GPUMODEL"]._serialized_end =
|
38
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start =
|
39
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end =
|
40
|
-
_globals["_EXECUTORSTATUS"]._serialized_start =
|
41
|
-
_globals["_EXECUTORSTATUS"]._serialized_end =
|
42
|
-
_globals["_EXECUTORFLAVOR"]._serialized_start =
|
43
|
-
_globals["_EXECUTORFLAVOR"]._serialized_end =
|
44
|
-
_globals["_TASKOUTCOME"]._serialized_start =
|
45
|
-
_globals["_TASKOUTCOME"]._serialized_end =
|
46
|
-
_globals["_OUTPUTENCODING"]._serialized_start =
|
47
|
-
_globals["_OUTPUTENCODING"]._serialized_end =
|
34
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_start = 4857
|
35
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5028
|
36
|
+
_globals["_GPUMODEL"]._serialized_start = 5031
|
37
|
+
_globals["_GPUMODEL"]._serialized_end = 5191
|
38
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5194
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5652
|
40
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 5655
|
41
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 5850
|
42
|
+
_globals["_EXECUTORFLAVOR"]._serialized_start = 5852
|
43
|
+
_globals["_EXECUTORFLAVOR"]._serialized_end = 5952
|
44
|
+
_globals["_TASKOUTCOME"]._serialized_start = 5954
|
45
|
+
_globals["_TASKOUTCOME"]._serialized_end = 6045
|
46
|
+
_globals["_OUTPUTENCODING"]._serialized_start = 6047
|
47
|
+
_globals["_OUTPUTENCODING"]._serialized_end = 6174
|
48
48
|
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
49
|
_globals["_DATAPAYLOAD"]._serialized_end = 318
|
50
50
|
_globals["_GPURESOURCES"]._serialized_start = 320
|
@@ -58,29 +58,29 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
58
58
|
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1017
|
59
59
|
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1592
|
60
60
|
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1595
|
61
|
-
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end =
|
62
|
-
_globals["_EXECUTORSTATE"]._serialized_start =
|
63
|
-
_globals["_EXECUTORSTATE"]._serialized_end =
|
64
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start =
|
65
|
-
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end =
|
66
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start =
|
67
|
-
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end =
|
68
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start =
|
69
|
-
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end =
|
70
|
-
_globals["_TASKRETRYPOLICY"]._serialized_start =
|
71
|
-
_globals["_TASKRETRYPOLICY"]._serialized_end =
|
72
|
-
_globals["_TASK"]._serialized_start =
|
73
|
-
_globals["_TASK"]._serialized_end =
|
74
|
-
_globals["_TASKALLOCATION"]._serialized_start =
|
75
|
-
_globals["_TASKALLOCATION"]._serialized_end =
|
76
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start =
|
77
|
-
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end =
|
78
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_start =
|
79
|
-
_globals["_DESIREDEXECUTORSTATE"]._serialized_end =
|
80
|
-
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start =
|
81
|
-
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end =
|
82
|
-
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start =
|
83
|
-
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end =
|
84
|
-
_globals["_EXECUTORAPI"]._serialized_start =
|
85
|
-
_globals["_EXECUTORAPI"]._serialized_end =
|
61
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1785
|
62
|
+
_globals["_EXECUTORSTATE"]._serialized_start = 1788
|
63
|
+
_globals["_EXECUTORSTATE"]._serialized_end = 2623
|
64
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2404
|
65
|
+
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2449
|
66
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 2625
|
67
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 2733
|
68
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 2735
|
69
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 2764
|
70
|
+
_globals["_TASKRETRYPOLICY"]._serialized_start = 2767
|
71
|
+
_globals["_TASKRETRYPOLICY"]._serialized_end = 2974
|
72
|
+
_globals["_TASK"]._serialized_start = 2977
|
73
|
+
_globals["_TASK"]._serialized_end = 3653
|
74
|
+
_globals["_TASKALLOCATION"]._serialized_start = 3655
|
75
|
+
_globals["_TASKALLOCATION"]._serialized_end = 3782
|
76
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 3784
|
77
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 3859
|
78
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 3862
|
79
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4047
|
80
|
+
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start = 4050
|
81
|
+
_globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4825
|
82
|
+
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4827
|
83
|
+
_globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4854
|
84
|
+
_globals["_EXECUTORAPI"]._serialized_start = 6177
|
85
|
+
_globals["_EXECUTORAPI"]._serialized_end = 6544
|
86
86
|
# @@protoc_insertion_point(module_scope)
|
@@ -24,6 +24,7 @@ class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
24
24
|
GPU_MODEL_NVIDIA_A100_40GB: _ClassVar[GPUModel]
|
25
25
|
GPU_MODEL_NVIDIA_A100_80GB: _ClassVar[GPUModel]
|
26
26
|
GPU_MODEL_NVIDIA_H100_80GB: _ClassVar[GPUModel]
|
27
|
+
GPU_MODEL_NVIDIA_TESLA_T4: _ClassVar[GPUModel]
|
27
28
|
|
28
29
|
class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
29
30
|
__slots__ = ()
|
@@ -78,6 +79,7 @@ GPU_MODEL_UNKNOWN: GPUModel
|
|
78
79
|
GPU_MODEL_NVIDIA_A100_40GB: GPUModel
|
79
80
|
GPU_MODEL_NVIDIA_A100_80GB: GPUModel
|
80
81
|
GPU_MODEL_NVIDIA_H100_80GB: GPUModel
|
82
|
+
GPU_MODEL_NVIDIA_TESLA_T4: GPUModel
|
81
83
|
FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
|
82
84
|
FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
|
83
85
|
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
|
@@ -245,18 +247,15 @@ class FunctionExecutorDescription(_message.Message):
|
|
245
247
|
) -> None: ...
|
246
248
|
|
247
249
|
class FunctionExecutorState(_message.Message):
|
248
|
-
__slots__ = ("description", "status"
|
250
|
+
__slots__ = ("description", "status")
|
249
251
|
DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
|
250
252
|
STATUS_FIELD_NUMBER: _ClassVar[int]
|
251
|
-
STATUS_MESSAGE_FIELD_NUMBER: _ClassVar[int]
|
252
253
|
description: FunctionExecutorDescription
|
253
254
|
status: FunctionExecutorStatus
|
254
|
-
status_message: str
|
255
255
|
def __init__(
|
256
256
|
self,
|
257
257
|
description: _Optional[_Union[FunctionExecutorDescription, _Mapping]] = ...,
|
258
258
|
status: _Optional[_Union[FunctionExecutorStatus, str]] = ...,
|
259
|
-
status_message: _Optional[str] = ...,
|
260
259
|
) -> None: ...
|
261
260
|
|
262
261
|
class ExecutorState(_message.Message):
|
@@ -268,7 +267,7 @@ class ExecutorState(_message.Message):
|
|
268
267
|
"version",
|
269
268
|
"status",
|
270
269
|
"total_resources",
|
271
|
-
"
|
270
|
+
"total_function_executor_resources",
|
272
271
|
"allowed_functions",
|
273
272
|
"function_executor_states",
|
274
273
|
"labels",
|
@@ -293,7 +292,7 @@ class ExecutorState(_message.Message):
|
|
293
292
|
VERSION_FIELD_NUMBER: _ClassVar[int]
|
294
293
|
STATUS_FIELD_NUMBER: _ClassVar[int]
|
295
294
|
TOTAL_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
296
|
-
|
295
|
+
TOTAL_FUNCTION_EXECUTOR_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
297
296
|
ALLOWED_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
|
298
297
|
FUNCTION_EXECUTOR_STATES_FIELD_NUMBER: _ClassVar[int]
|
299
298
|
LABELS_FIELD_NUMBER: _ClassVar[int]
|
@@ -306,7 +305,7 @@ class ExecutorState(_message.Message):
|
|
306
305
|
version: str
|
307
306
|
status: ExecutorStatus
|
308
307
|
total_resources: HostResources
|
309
|
-
|
308
|
+
total_function_executor_resources: HostResources
|
310
309
|
allowed_functions: _containers.RepeatedCompositeFieldContainer[AllowedFunction]
|
311
310
|
function_executor_states: _containers.RepeatedCompositeFieldContainer[
|
312
311
|
FunctionExecutorState
|
@@ -323,7 +322,9 @@ class ExecutorState(_message.Message):
|
|
323
322
|
version: _Optional[str] = ...,
|
324
323
|
status: _Optional[_Union[ExecutorStatus, str]] = ...,
|
325
324
|
total_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
|
326
|
-
|
325
|
+
total_function_executor_resources: _Optional[
|
326
|
+
_Union[HostResources, _Mapping]
|
327
|
+
] = ...,
|
327
328
|
allowed_functions: _Optional[
|
328
329
|
_Iterable[_Union[AllowedFunction, _Mapping]]
|
329
330
|
] = ...,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.29
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
17
17
|
Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
|
18
18
|
Requires-Dist: boto3 (>=1.37.30,<2.0.0)
|
19
19
|
Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
|
20
|
+
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
20
21
|
Requires-Dist: rich (>=13.9.2,<14.0.0)
|
21
22
|
Requires-Dist: tensorlake (>=0.1)
|
22
23
|
Requires-Dist: typer (>=0.12,<0.13)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
indexify/cli/cli.py,sha256=
|
1
|
+
indexify/cli/cli.py,sha256=_FSY-HnSFm2fY2ppg5UTqrXq5zBptzw3cRcGZFkYQGA,9804
|
2
2
|
indexify/executor/README.md,sha256=ozC6_hMkhQQNVCMEpBxwiUALz6lwErPQxNxQfQDqnG4,2029
|
3
3
|
indexify/executor/api_objects.py,sha256=kHx5gKPwM0Rm64Ea__kPFwuarStX0u_9uaE7vV5M5z8,2222
|
4
4
|
indexify/executor/blob_store/blob_store.py,sha256=XViw_KRfFSNqwcFYwMZixZF-EYCjXK2AQHdt0xh4UVo,2368
|
@@ -9,10 +9,10 @@ indexify/executor/downloader.py,sha256=k9VbfOa-D6YH-cX8Sz-W-gWTsxmeVpSaIOq0xTC9K
|
|
9
9
|
indexify/executor/executor.py,sha256=WE9ABct1yAlfh4-cOUcp1vTjFbkiXNAGlsbsNbfWdkU,17006
|
10
10
|
indexify/executor/executor_flavor.py,sha256=uilzDQVVYlQGR1MVnrUC4NevUActDWHdnJkr38M6kTk,118
|
11
11
|
indexify/executor/function_executor/function_executor.py,sha256=agfUxzSQ-2TqkpMhW3OvOSMF_EhpemetaL3_dYp29Ro,11888
|
12
|
-
indexify/executor/function_executor/function_executor_state.py,sha256=
|
13
|
-
indexify/executor/function_executor/function_executor_states_container.py,sha256=
|
12
|
+
indexify/executor/function_executor/function_executor_state.py,sha256=_bxUKNtuIMDVHVnDzMzMj-Qy4sR18MTwBtakdoAQ0y0,4209
|
13
|
+
indexify/executor/function_executor/function_executor_states_container.py,sha256=ht2xcFXWgCjYxCoeMffB0WHUPgSKJ3QIoswiomwP9WA,3899
|
14
14
|
indexify/executor/function_executor/function_executor_status.py,sha256=Ms8tHG0wlw__pToeQIfBV6SO9c4tPu3UQgJAwXUkg2M,3597
|
15
|
-
indexify/executor/function_executor/health_checker.py,sha256=
|
15
|
+
indexify/executor/function_executor/health_checker.py,sha256=IxE0jnC99K_lvnizFLjXqS1942H8-FNAN4AlhLIjg2Y,6373
|
16
16
|
indexify/executor/function_executor/invocation_state_client.py,sha256=VTpeNxxfsa0ej20Q_ker5RZVdHiu59HWd5qNOjo6DBQ,9800
|
17
17
|
indexify/executor/function_executor/metrics/function_executor.py,sha256=TDksxLRJr-P9ZKhF2Orsaxzzb4lVIBxFEjd_9Zv53Ng,6313
|
18
18
|
indexify/executor/function_executor/metrics/function_executor_state.py,sha256=qheMhnoiYLiZB7ky5EyegfDy4Mr0Zh83bOE0gJ38YmU,1607
|
@@ -27,18 +27,18 @@ indexify/executor/function_executor/server/subprocess_function_executor_server.p
|
|
27
27
|
indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py,sha256=g1AUbhOoPsdhp_50Ayahdyv1Ix5-nEBE8orOQfkATpM,4470
|
28
28
|
indexify/executor/function_executor/single_task_runner.py,sha256=6Fb9icnZ21pJcCq3mddFRoonPdNpUdSjTSQYh9nJXS0,14977
|
29
29
|
indexify/executor/function_executor/task_input.py,sha256=wSrHR4m0juiGClQyeVdhRC37QzDt6Rrjq-ZXJkfBi9k,584
|
30
|
-
indexify/executor/function_executor/task_output.py,sha256=
|
30
|
+
indexify/executor/function_executor/task_output.py,sha256=LpdQZuL1s7FDNk-3TwA__4g5twyfmv45ezBf5Ckf6IE,3434
|
31
31
|
indexify/executor/grpc/channel_manager.py,sha256=ihDkLoiGBLfSmoA2szbntjCfL3E_NDf5LABRXE7YRec,6330
|
32
|
-
indexify/executor/grpc/function_executor_controller.py,sha256=
|
32
|
+
indexify/executor/grpc/function_executor_controller.py,sha256=GRAwhCIDZA-mFGGiyZyOlcezxZ4LXQPkuH_ooTrKEgc,17418
|
33
33
|
indexify/executor/grpc/metrics/channel_manager.py,sha256=k-WArgklmP5WhjcmFmrgRblB7yc3XlaOXO8owRyV-mw,649
|
34
34
|
indexify/executor/grpc/metrics/state_reconciler.py,sha256=0aI2IM4XztKxFa7NCxYSLafw_iiej3p07yEiKyewXIM,585
|
35
35
|
indexify/executor/grpc/metrics/state_reporter.py,sha256=GggBEjMzQUYIG95LtTS4fUg1u9jYowkaXoUXppAXucs,543
|
36
36
|
indexify/executor/grpc/metrics/task_controller.py,sha256=9Nm86nGxL2rZ3rAORB0_CBdO--Fe4MBrewVW4CqGyOU,222
|
37
|
-
indexify/executor/grpc/state_reconciler.py,sha256=
|
38
|
-
indexify/executor/grpc/state_reporter.py,sha256=
|
39
|
-
indexify/executor/grpc/task_controller.py,sha256=
|
40
|
-
indexify/executor/host_resources/host_resources.py,sha256=
|
41
|
-
indexify/executor/host_resources/nvidia_gpu.py,sha256=
|
37
|
+
indexify/executor/grpc/state_reconciler.py,sha256=vAT8LLPUQHDF42c5sa1-4T7FRBka3Bdv88Tv0L_jagk,19957
|
38
|
+
indexify/executor/grpc/state_reporter.py,sha256=SIlEVzKxoYplCt1SYyaage-n3x53TlCb_e-C5xqmvIk,12973
|
39
|
+
indexify/executor/grpc/task_controller.py,sha256=53QJsKQSHIbath8qB8Wgf44l-Ybj40VJ5TLm1Ra1InE,20976
|
40
|
+
indexify/executor/host_resources/host_resources.py,sha256=bp4TK167Av700lVhWDMg_2bV_Vbt8dpPgYA-RJJ5H38,4078
|
41
|
+
indexify/executor/host_resources/nvidia_gpu.py,sha256=S4poK8jw5rTRs6l1W5QR2P2Oo4kZ2zq9I-ViabCpjzU,2705
|
42
42
|
indexify/executor/host_resources/nvidia_gpu_allocator.py,sha256=oULSjL0AVo_nqR_pquq17079UalHQkhMwMqf72gbPHo,1872
|
43
43
|
indexify/executor/metrics/downloader.py,sha256=lctPh8xjkXeLEFJnl1hNrD1yEhLhIl5sggsR4Yoe_Zc,2746
|
44
44
|
indexify/executor/metrics/executor.py,sha256=ua-Vv_k1CB4juJdF7tEBQbBMksqWAA3iXKKMKXZUCLk,2369
|
@@ -56,13 +56,13 @@ indexify/executor/monitoring/server.py,sha256=yzdYhcxnmY6uTQUMt3vatF5jilN52ZtfFs
|
|
56
56
|
indexify/executor/monitoring/startup_probe_handler.py,sha256=zXXsBU15SMlBx1bSFpxWDfed1VHtKKnwvLQ8-frpG98,425
|
57
57
|
indexify/executor/runtime_probes.py,sha256=bo6Dq6AGZpJH099j0DHtVSDEH80tv3j9MXf3VXSx_p8,2182
|
58
58
|
indexify/executor/task_fetcher.py,sha256=p3iEsWyGi0ZMPAv0183smzOUD1KycQ_dXsyd9mpB9IU,3529
|
59
|
-
indexify/executor/task_reporter.py,sha256=
|
59
|
+
indexify/executor/task_reporter.py,sha256=gFERv8r7dEYazmx0E6nujXsglzpy9iS-5-fqn9BESI8,16622
|
60
60
|
indexify/executor/task_runner.py,sha256=UupZbGxU9BN4i1t6M8tH-5k3s4eUPEhMhar1YI0Aztk,7219
|
61
|
-
indexify/proto/executor_api.proto,sha256=
|
62
|
-
indexify/proto/executor_api_pb2.py,sha256=
|
63
|
-
indexify/proto/executor_api_pb2.pyi,sha256=
|
61
|
+
indexify/proto/executor_api.proto,sha256=QMbYqwjjC7ujOecQf9VT9OzI6H_f6wdafCt1lBhCoMU,10306
|
62
|
+
indexify/proto/executor_api_pb2.py,sha256=R74YKjzxD9o_SQgamySlsyfMjUEXMXmIONXL0y0DI8Y,15109
|
63
|
+
indexify/proto/executor_api_pb2.pyi,sha256=0DlfrG6TILJl9g_hVcyygxRpZk6KWfp7rnuiFT3FYQM,19784
|
64
64
|
indexify/proto/executor_api_pb2_grpc.py,sha256=GGiDtyQlA2382E_ZyKUBYcWNEJHH_RlulieStKfkJXI,9514
|
65
|
-
indexify-0.3.
|
66
|
-
indexify-0.3.
|
67
|
-
indexify-0.3.
|
68
|
-
indexify-0.3.
|
65
|
+
indexify-0.3.29.dist-info/METADATA,sha256=FLJLhnVYJDWtEU3L6SGfHQDSNe9TdPCZjt-lLqVKYIM,1237
|
66
|
+
indexify-0.3.29.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
67
|
+
indexify-0.3.29.dist-info/entry_points.txt,sha256=GU9wmsgvN7nQw3N2X0PMYn1RSvF6CrhH9RuC2D8d3Gk,53
|
68
|
+
indexify-0.3.29.dist-info/RECORD,,
|
File without changes
|
File without changes
|