indexify 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/cli/cli.py CHANGED
@@ -7,7 +7,6 @@ from tensorlake.utils.logging import (
7
7
  configure_logging_early()
8
8
 
9
9
  import os
10
- import re
11
10
  import shutil
12
11
  import sys
13
12
  from importlib.metadata import version
@@ -33,6 +32,8 @@ from indexify.executor.executor_flavor import ExecutorFlavor
33
32
  from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
34
33
  SubprocessFunctionExecutorServerFactory,
35
34
  )
35
+ from indexify.executor.host_resources.host_resources import HostResourcesProvider
36
+ from indexify.executor.host_resources.nvidia_gpu_allocator import NvidiaGPUAllocator
36
37
  from indexify.executor.monitoring.health_checker.generic_health_checker import (
37
38
  GenericHealthChecker,
38
39
  )
@@ -225,6 +226,7 @@ def executor(
225
226
  monitoring_server_port=monitoring_server_port,
226
227
  enable_grpc_state_reconciler=enable_grpc_state_reconciler,
227
228
  blob_store=blob_store,
229
+ host_resources_provider=HostResourcesProvider(NvidiaGPUAllocator(logger)),
228
230
  ).run()
229
231
 
230
232
 
@@ -10,6 +10,25 @@ class DataPayload(BaseModel):
10
10
  content_type: Optional[str] = None
11
11
 
12
12
 
13
+ class NodeGPU(BaseModel):
14
+ count: int
15
+ model: str
16
+
17
+
18
+ class TaskResources(BaseModel):
19
+ cpus: float
20
+ memory_mb: int
21
+ ephemeral_disk_mb: int
22
+ gpu: Optional[NodeGPU] = None
23
+
24
+
25
+ class TaskRetryPolicy(BaseModel):
26
+ max_retries: int
27
+ initial_delay_sec: float
28
+ max_delay_sec: float
29
+ delay_multiplier: float
30
+
31
+
13
32
  class Task(BaseModel):
14
33
  id: str
15
34
  namespace: str
@@ -27,6 +46,9 @@ class Task(BaseModel):
27
46
  input_payload: Optional[DataPayload] = None
28
47
  reducer_input_payload: Optional[DataPayload] = None
29
48
  output_payload_uri_prefix: Optional[str] = None
49
+ timeout: Optional[int] = None # in seconds
50
+ resources: Optional[TaskResources] = None
51
+ retry_policy: Optional[TaskRetryPolicy] = None
30
52
 
31
53
 
32
54
  class FunctionURI(BaseModel):
@@ -24,6 +24,7 @@ from .function_executor.server.function_executor_server_factory import (
24
24
  from .grpc.channel_manager import ChannelManager
25
25
  from .grpc.state_reconciler import ExecutorStateReconciler
26
26
  from .grpc.state_reporter import ExecutorStateReporter
27
+ from .host_resources.host_resources import HostResourcesProvider
27
28
  from .metrics.executor import (
28
29
  METRIC_TASKS_COMPLETED_OUTCOME_ALL,
29
30
  METRIC_TASKS_COMPLETED_OUTCOME_ERROR_CUSTOMER_CODE,
@@ -71,6 +72,7 @@ class Executor:
71
72
  monitoring_server_port: int,
72
73
  enable_grpc_state_reconciler: bool,
73
74
  blob_store: BLOBStore,
75
+ host_resources_provider: HostResourcesProvider,
74
76
  ):
75
77
  self._logger = structlog.get_logger(module=__name__)
76
78
  self._is_shutdown: bool = False
@@ -118,6 +120,7 @@ class Executor:
118
120
  function_allowlist=self._function_allowlist,
119
121
  function_executor_states=self._function_executor_states,
120
122
  channel_manager=self._channel_manager,
123
+ host_resources_provider=host_resources_provider,
121
124
  logger=self._logger,
122
125
  )
123
126
  self._state_reporter.update_executor_status(
@@ -171,6 +174,7 @@ class Executor:
171
174
  executor_info: Dict[str, str] = {
172
175
  "id": id,
173
176
  "dev_mode": str(development_mode),
177
+ "flavor": flavor.name,
174
178
  "version": version,
175
179
  "code_path": str(code_path),
176
180
  "server_addr": server_addr,
@@ -179,6 +183,8 @@ class Executor:
179
183
  "enable_grpc_state_reconciler": str(enable_grpc_state_reconciler),
180
184
  "hostname": gethostname(),
181
185
  }
186
+ for key, value in labels.items():
187
+ executor_info["label_" + key] = value
182
188
  executor_info.update(function_allowlist_to_info_dict(function_allowlist))
183
189
  metric_executor_info.info(executor_info)
184
190
 
@@ -24,6 +24,10 @@ class FunctionExecutorServerConfiguration:
24
24
  graph_version: str
25
25
  image_uri: Optional[str]
26
26
  secret_names: List[str]
27
+ cpu_ms_per_sec: Optional[int]
28
+ memory_bytes: Optional[int]
29
+ disk_bytes: Optional[int]
30
+ gpu_count: int
27
31
 
28
32
 
29
33
  class FunctionExecutorServerFactory:
@@ -1,4 +1,5 @@
1
1
  from collections.abc import Awaitable, Callable
2
+ from math import ceil
2
3
  from typing import Any, Optional
3
4
 
4
5
  import grpc
@@ -131,16 +132,33 @@ class SingleTaskRunner:
131
132
  self._function_executor_state.function_executor = FunctionExecutor(
132
133
  server_factory=self._function_executor_server_factory, logger=self._logger
133
134
  )
135
+ task: Task = self._task_input.task
134
136
  config: FunctionExecutorServerConfiguration = (
135
137
  FunctionExecutorServerConfiguration(
136
138
  executor_id=self._executor_id,
137
139
  function_executor_id=self._function_executor_state.id,
138
- namespace=self._task_input.task.namespace,
139
- image_uri=self._task_input.task.image_uri,
140
- secret_names=self._task_input.task.secret_names or [],
141
- graph_name=self._task_input.task.compute_graph,
142
- graph_version=self._task_input.task.graph_version,
143
- function_name=self._task_input.task.compute_fn,
140
+ namespace=task.namespace,
141
+ graph_name=task.compute_graph,
142
+ graph_version=task.graph_version,
143
+ function_name=task.compute_fn,
144
+ image_uri=task.image_uri,
145
+ secret_names=task.secret_names or [],
146
+ cpu_ms_per_sec=(
147
+ None
148
+ if task.resources.cpus is None
149
+ else ceil(task.resources.cpus * 1000)
150
+ ),
151
+ memory_bytes=(
152
+ None
153
+ if task.resources.memory_mb is None
154
+ else task.resources.memory_mb * 1024 * 1024
155
+ ),
156
+ disk_bytes=(
157
+ None
158
+ if task.resources.ephemeral_disk_mb is None
159
+ else task.resources.ephemeral_disk_mb * 1024 * 1024
160
+ ),
161
+ gpu_count=0 if task.resources.gpu is None else task.resources.gpu.count,
144
162
  )
145
163
  )
146
164
  initialize_request: InitializeRequest = InitializeRequest(
@@ -9,6 +9,7 @@ from tensorlake.function_executor.proto.message_validator import MessageValidato
9
9
 
10
10
  from indexify.proto.executor_api_pb2 import (
11
11
  FunctionExecutorDescription,
12
+ FunctionExecutorResources,
12
13
  )
13
14
  from indexify.proto.executor_api_pb2 import (
14
15
  FunctionExecutorStatus as FunctionExecutorStatusProto,
@@ -42,6 +43,13 @@ def validate_function_executor_description(
42
43
  # image_uri is optional.
43
44
  # secret_names can be empty.
44
45
  # resource_limits is optional.
46
+ # TODO: Make resources required after we migrate Server to them.
47
+ # validator.required_field("resources")
48
+ # validator = MessageValidator(function_executor_description.resources)
49
+ # validator.required_field("cpu_ms_per_sec")
50
+ # validator.required_field("memory_bytes")
51
+ # validator.required_field("disk_bytes")
52
+ # validator.required_field("gpu_count")
45
53
 
46
54
 
47
55
  def function_executor_logger(
@@ -333,14 +341,24 @@ async def _create_function_executor(
333
341
  executor_id=executor_id,
334
342
  function_executor_id=function_executor_description.id,
335
343
  namespace=function_executor_description.namespace,
336
- image_uri=None,
337
- secret_names=list(function_executor_description.secret_names),
338
344
  graph_name=function_executor_description.graph_name,
339
345
  graph_version=function_executor_description.graph_version,
340
346
  function_name=function_executor_description.function_name,
347
+ image_uri=None,
348
+ secret_names=list(function_executor_description.secret_names),
349
+ cpu_ms_per_sec=None,
350
+ memory_bytes=None,
351
+ disk_bytes=None,
352
+ gpu_count=0,
341
353
  )
342
354
  if function_executor_description.HasField("image_uri"):
343
355
  config.image_uri = function_executor_description.image_uri
356
+ if function_executor_description.HasField("resources"):
357
+ resources: FunctionExecutorResources = function_executor_description.resources
358
+ config.cpu_ms_per_sec = resources.cpu_ms_per_sec
359
+ config.memory_bytes = resources.memory_bytes
360
+ config.disk_bytes = resources.disk_bytes
361
+ config.gpu_count = resources.gpu_count
344
362
 
345
363
  initialize_request: InitializeRequest = InitializeRequest(
346
364
  namespace=function_executor_description.namespace,
@@ -18,10 +18,12 @@ from indexify.proto.executor_api_pb2 import (
18
18
  from indexify.proto.executor_api_pb2 import (
19
19
  FunctionExecutorStatus as FunctionExecutorStatusProto,
20
20
  )
21
+ from indexify.proto.executor_api_pb2 import GPUModel as GPUModelProto
21
22
  from indexify.proto.executor_api_pb2 import (
22
- GPUModel,
23
23
  GPUResources,
24
- HostResources,
24
+ )
25
+ from indexify.proto.executor_api_pb2 import HostResources as HostResourcesProto
26
+ from indexify.proto.executor_api_pb2 import (
25
27
  ReportExecutorStateRequest,
26
28
  )
27
29
  from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
@@ -33,6 +35,7 @@ from ..function_executor.function_executor_states_container import (
33
35
  FunctionExecutorStatesContainer,
34
36
  )
35
37
  from ..function_executor.function_executor_status import FunctionExecutorStatus
38
+ from ..host_resources.host_resources import HostResources, HostResourcesProvider
36
39
  from ..runtime_probes import RuntimeProbes
37
40
  from .channel_manager import ChannelManager
38
41
  from .metrics.state_reporter import (
@@ -57,6 +60,7 @@ class ExecutorStateReporter:
57
60
  function_allowlist: Optional[List[FunctionURI]],
58
61
  function_executor_states: FunctionExecutorStatesContainer,
59
62
  channel_manager: ChannelManager,
63
+ host_resources_provider: HostResourcesProvider,
60
64
  logger: Any,
61
65
  reporting_interval_sec: int = _REPORTING_INTERVAL_SEC,
62
66
  ):
@@ -72,6 +76,9 @@ class ExecutorStateReporter:
72
76
  self._channel_manager = channel_manager
73
77
  self._logger: Any = logger.bind(module=__name__)
74
78
  self._reporting_interval_sec: int = reporting_interval_sec
79
+ self._total_host_resources: HostResourcesProto = _host_resources_to_proto(
80
+ host_resources_provider.total_resources(logger)
81
+ )
75
82
 
76
83
  self._is_shutdown: bool = False
77
84
  self._executor_status: ExecutorStatus = ExecutorStatus.EXECUTOR_STATUS_UNKNOWN
@@ -130,7 +137,9 @@ class ExecutorStateReporter:
130
137
  flavor=_to_grpc_executor_flavor(self._flavor, self._logger),
131
138
  version=self._version,
132
139
  status=self._executor_status,
133
- free_resources=await self._fetch_free_host_resources(),
140
+ # Server requires free_resources to be set but ignores its value for now.
141
+ free_resources=self._total_host_resources,
142
+ total_resources=self._total_host_resources,
134
143
  allowed_functions=self._allowed_functions,
135
144
  function_executor_states=await self._fetch_function_executor_states(),
136
145
  labels=self._labels,
@@ -151,18 +160,6 @@ class ExecutorStateReporter:
151
160
  """
152
161
  self._is_shutdown = True
153
162
 
154
- async def _fetch_free_host_resources(self) -> HostResources:
155
- # TODO: Implement host resource metrics reporting.
156
- return HostResources(
157
- cpu_count=0,
158
- memory_bytes=0,
159
- disk_bytes=0,
160
- gpu=GPUResources(
161
- count=0,
162
- model=GPUModel.GPU_MODEL_UNKNOWN,
163
- ),
164
- )
165
-
166
163
  async def _fetch_function_executor_states(self) -> List[FunctionExecutorStateProto]:
167
164
  states = []
168
165
 
@@ -264,3 +261,20 @@ def _state_hash(state: ExecutorState) -> str:
264
261
  hasher = hashlib.sha256(usedforsecurity=False)
265
262
  hasher.update(serialized_state)
266
263
  return hasher.hexdigest()
264
+
265
+
266
+ def _host_resources_to_proto(host_resources: HostResources) -> HostResourcesProto:
267
+ proto = HostResourcesProto(
268
+ cpu_count=host_resources.cpu_count,
269
+ memory_bytes=host_resources.memory_mb * 1024 * 1024,
270
+ disk_bytes=host_resources.disk_mb * 1024 * 1024,
271
+ )
272
+ if len(host_resources.gpus) > 0:
273
+ proto.gpu = GPUResources(
274
+ count=len(host_resources.gpus),
275
+ deprecated_model=GPUModelProto.GPU_MODEL_UNKNOWN, # TODO: Remove this field
276
+ model=host_resources.gpus[
277
+ 0
278
+ ].model.value, # All GPUs should have the same model
279
+ )
280
+ return proto
@@ -0,0 +1,50 @@
1
+ from typing import List
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from .nvidia_gpu import NvidiaGPUInfo
6
+ from .nvidia_gpu_allocator import NvidiaGPUAllocator
7
+
8
+
9
+ class HostResources(BaseModel):
10
+ cpu_count: int
11
+ memory_mb: int
12
+ disk_mb: int
13
+ gpus: List[NvidiaGPUInfo]
14
+
15
+
16
+ class HostResourcesProvider:
17
+ """
18
+ HostResourcesProvider is a class that provides information about the host resources.
19
+ """
20
+
21
+ def __init__(self, gpu_allocator: NvidiaGPUAllocator):
22
+ self._gpu_allocator: NvidiaGPUAllocator = gpu_allocator
23
+
24
+ def total_resources(self, logger) -> HostResources:
25
+ """Returns all hardware resources that exist at the host.
26
+
27
+ Raises Exception on error.
28
+ """
29
+ logger = logger.bind(module=__name__)
30
+
31
+ return HostResources(
32
+ cpu_count=0, # TODO: Implement for Linux and MacOS hosts
33
+ memory_mb=0, # TODO: Implement for Linux and MacOS hosts
34
+ disk_mb=0, # TODO: Implement for Linux and MacOS hosts
35
+ gpus=self._gpu_allocator.list_all(),
36
+ )
37
+
38
+ def free_resources(self, logger) -> HostResources:
39
+ """Returns all hardware resources that are free at the host.
40
+
41
+ Raises Exception on error.
42
+ """
43
+ logger = logger.bind(module=__name__)
44
+
45
+ return HostResources(
46
+ cpu_count=0, # TODO: Implement for Linux and MacOS hosts
47
+ memory_mb=0, # TODO: Implement for Linux and MacOS hosts
48
+ disk_mb=0, # TODO: Implement for Linux and MacOS hosts
49
+ gpus=self._gpu_allocator.list_free(),
50
+ )
@@ -0,0 +1,77 @@
1
+ import subprocess
2
+ from enum import Enum
3
+ from typing import Any, List
4
+
5
+ from pydantic import BaseModel
6
+ from tensorlake.functions_sdk.resources import GPU_MODEL
7
+
8
+
9
+ # Only NVIDIA GPUs currently supported in Tensorlake SDK are listed here.
10
+ class NVIDIA_GPU_MODEL(str, Enum):
11
+ UNKNOWN = "UNKNOWN"
12
+ A100_40GB = GPU_MODEL.A100_40GB
13
+ A100_80GB = GPU_MODEL.A100_80GB
14
+ H100_80GB = GPU_MODEL.H100
15
+
16
+
17
+ class NvidiaGPUInfo(BaseModel):
18
+ index: str
19
+ uuid: str
20
+ product_name: str # The official product name.
21
+ model: NVIDIA_GPU_MODEL
22
+
23
+
24
+ def nvidia_gpus_are_available() -> bool:
25
+ try:
26
+ result: subprocess.CompletedProcess = subprocess.run(
27
+ ["nvidia-smi"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
28
+ )
29
+ return result.returncode == 0
30
+ except Exception:
31
+ return False
32
+
33
+
34
+ def fetch_nvidia_gpu_infos(logger: Any) -> List[NvidiaGPUInfo]:
35
+ logger = logger.bind(module=__name__)
36
+ logger.info("Fetching GPU information")
37
+
38
+ try:
39
+ result: subprocess.CompletedProcess = subprocess.run(
40
+ ["nvidia-smi", "--query-gpu=index,name,uuid", "--format=csv,noheader"],
41
+ capture_output=True,
42
+ check=True,
43
+ text=True,
44
+ )
45
+ except subprocess.CalledProcessError as e:
46
+ logger.error("Failed to fetch GPU information", exc_info=e)
47
+ raise
48
+
49
+ infos: List[NvidiaGPUInfo] = []
50
+ for line in result.stdout.splitlines():
51
+ # Example:
52
+ # nvidia-smi --query-gpu=index,name,uuid --format=csv,noheader
53
+ # 0, NVIDIA A100-SXM4-80GB, GPU-89fdc1e1-18b2-f499-c12b-82bcb9bfb3fa
54
+ # 1, NVIDIA A100-PCIE-40GB, GPU-e9c9aa65-bff3-405a-ab7c-dc879cc88169
55
+ # 2, NVIDIA H100 80GB HBM3, GPU-8c35f4c9-4dff-c9a2-866f-afb5d82e1dd7
56
+ parts = line.split(",")
57
+ index = parts[0].strip()
58
+ product_name = parts[1].strip()
59
+ uuid = parts[2].strip()
60
+
61
+ model = NVIDIA_GPU_MODEL.UNKNOWN
62
+ if product_name.startswith("NVIDIA A100") and product_name.endswith("80GB"):
63
+ model = NVIDIA_GPU_MODEL.A100_80GB
64
+ if product_name.startswith("NVIDIA A100") and product_name.endswith("40GB"):
65
+ model = NVIDIA_GPU_MODEL.A100_40GB
66
+ elif product_name.startswith("NVIDIA H100"):
67
+ model = NVIDIA_GPU_MODEL.H100_80GB
68
+
69
+ if model == NVIDIA_GPU_MODEL.UNKNOWN:
70
+ logger.warning("Unknown GPU model detected", nvidia_smi_output=line)
71
+
72
+ infos.append(
73
+ NvidiaGPUInfo(
74
+ index=index, uuid=uuid, product_name=product_name, model=model
75
+ )
76
+ )
77
+ return infos
@@ -0,0 +1,52 @@
1
+ from typing import Any, List
2
+
3
+ from .nvidia_gpu import NvidiaGPUInfo, fetch_nvidia_gpu_infos, nvidia_gpus_are_available
4
+
5
+
6
+ class NvidiaGPUAllocator:
7
+ """NvidiaGPUAllocator is a class that manages the allocation and deallocation of GPUs."""
8
+
9
+ def __init__(self, logger: Any):
10
+ gpu_infos: List[NvidiaGPUInfo] = []
11
+
12
+ if nvidia_gpus_are_available():
13
+ gpu_infos = fetch_nvidia_gpu_infos(logger)
14
+ logger.bind(module=__name__).info(
15
+ "Fetched information about NVIDIA GPUs:", info=gpu_infos
16
+ )
17
+
18
+ self._all_gpus: List[NvidiaGPUInfo] = gpu_infos
19
+ self._free_gpus: List[NvidiaGPUInfo] = list(gpu_infos)
20
+
21
+ def allocate(self, count: int, logger: Any) -> List[NvidiaGPUInfo]:
22
+ """
23
+ Allocates a specified number of GPUs.
24
+
25
+ Args:
26
+ count (int): The number of GPUs to allocate.
27
+
28
+ Returns:
29
+ List[NvidiaGPUInfo]: A list of allocated GPUs. The list is empty if count is 0.
30
+
31
+ Raises:
32
+ ValueError: If the requested number of GPUs exceeds free GPUs.
33
+ Exception: If an error occurs during allocation.
34
+ """
35
+ if count > len(self._free_gpus):
36
+ raise ValueError(
37
+ f"Not enough free GPUs available, requested={count}, available={len(self._free_gpus)}"
38
+ )
39
+
40
+ allocated_gpus: List[NvidiaGPUInfo] = []
41
+ for _ in range(count):
42
+ allocated_gpus.append(self._free_gpus.pop())
43
+ return allocated_gpus
44
+
45
+ def deallocate(self, gpus: List[NvidiaGPUInfo]) -> None:
46
+ self._free_gpus.extend(gpus)
47
+
48
+ def list_all(self) -> List[NvidiaGPUInfo]:
49
+ return list(self._all_gpus) # Return a copy to avoid external modification
50
+
51
+ def list_free(self) -> List[NvidiaGPUInfo]:
52
+ return list(self._free_gpus) # Return a copy to avoid external modification
@@ -28,26 +28,17 @@ message DataPayload {
28
28
 
29
29
  // ===== report_executor_state RPC =====
30
30
 
31
+ // Deprecated enum. TODO: remove when all the code is using model string.
31
32
  enum GPUModel {
32
33
  GPU_MODEL_UNKNOWN = 0;
33
- GPU_MODEL_NVIDIA_TESLA_T4_16GB = 10;
34
- GPU_MODEL_NVIDIA_TESLA_V100_16GB = 20;
35
- GPU_MODEL_NVIDIA_A10_24GB = 30;
36
- GPU_MODEL_NVIDIA_A6000_48GB = 40;
37
- // A100 GPUs
38
- GPU_MODEL_NVIDIA_A100_SXM4_40GB = 50;
39
- GPU_MODEL_NVIDIA_A100_SXM4_80GB = 51;
40
- GPU_MODEL_NVIDIA_A100_PCI_40GB = 52;
41
- // H100 GPUs
42
- GPU_MODEL_NVIDIA_H100_SXM5_80GB = 60;
43
- GPU_MODEL_NVIDIA_H100_PCI_80GB = 61;
44
- GPU_MODEL_NVIDIA_RTX_6000_24GB = 62;
45
34
  }
46
35
 
47
36
  // Free GPUs available at the Executor.
48
37
  message GPUResources {
49
38
  optional uint32 count = 1;
50
- optional GPUModel model = 2;
39
+ optional GPUModel deprecated_model = 2;
40
+ // Either GPU_MODEL value from Tensorlake SDK or "UNKNOWN"
41
+ optional string model = 3;
51
42
  }
52
43
 
53
44
  // Resources that we're currently tracking and limiting on Executor.
@@ -55,7 +46,8 @@ message HostResources {
55
46
  optional uint32 cpu_count = 1;
56
47
  optional uint64 memory_bytes = 2;
57
48
  optional uint64 disk_bytes = 3;
58
- optional GPUResources gpu = 4;
49
+ // Not set if no GPUs are available.
50
+ optional GPUResources gpu = 4;
59
51
  }
60
52
 
61
53
  // Specification of a single function that is allowed to be run on the Executor.
@@ -83,6 +75,15 @@ enum FunctionExecutorStatus {
83
75
  }
84
76
 
85
77
  // Immutable information that identifies and describes a Function Executor.
78
+ message FunctionExecutorResources {
79
+ // 1000 CPU ms per sec is one full CPU core.
80
+ // 2000 CPU ms per sec is two full CPU cores.
81
+ optional uint32 cpu_ms_per_sec = 1;
82
+ optional uint64 memory_bytes = 2;
83
+ optional uint64 disk_bytes = 3;
84
+ optional uint32 gpu_count = 4;
85
+ }
86
+
86
87
  message FunctionExecutorDescription {
87
88
  optional string id = 1;
88
89
  optional string namespace = 2;
@@ -95,6 +96,7 @@ message FunctionExecutorDescription {
95
96
  // Timeout for customer code duration during FE creation.
96
97
  optional uint32 customer_code_timeout_ms = 9;
97
98
  optional DataPayload graph = 10;
99
+ optional FunctionExecutorResources resources = 11;
98
100
  }
99
101
 
100
102
  message FunctionExecutorState {
@@ -128,8 +130,10 @@ message ExecutorState {
128
130
  optional ExecutorFlavor flavor = 4;
129
131
  optional string version = 5;
130
132
  optional ExecutorStatus status = 6;
133
+ // Total resources available at the Executor.
134
+ optional HostResources total_resources = 13;
131
135
  // Free resources available at the Executor.
132
- optional HostResources free_resources = 7;
136
+ optional HostResources free_resources = 7; // Not used right now.
133
137
  // Empty allowed_functions list means that any function can run on the Executor.
134
138
  repeated AllowedFunction allowed_functions = 8;
135
139
  repeated FunctionExecutorState function_executor_states = 9;
@@ -150,6 +154,14 @@ message ReportExecutorStateResponse {
150
154
  }
151
155
 
152
156
  // ===== get_desired_executor_states RPC =====
157
+ message TaskRetryPolicy {
158
+ optional uint32 max_retries = 1;
159
+ optional uint32 initial_delay_ms = 2;
160
+ optional uint32 max_delay_ms = 3;
161
+ // The multiplier value is 1000x of the actual value to avoid working with floating point.
162
+ optional uint32 delay_multiplier = 4;
163
+ }
164
+
153
165
  message Task {
154
166
  optional string id = 1;
155
167
  optional string namespace = 2;
@@ -166,6 +178,7 @@ message Task {
166
178
  // S3 URI if the data is stored in S3.
167
179
  // Starts with "file://"" prefix followed by an absolute directory path if the data is stored on a local file system.
168
180
  optional string output_payload_uri_prefix = 13;
181
+ optional TaskRetryPolicy retry_policy = 14;
169
182
  }
170
183
 
171
184
  message TaskAllocation {
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
19
19
 
20
20
 
21
21
  DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
22
- b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xed\x03\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graph"\xe8\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12\x1b\n\x0estatus_message\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x11\n\x0f_status_message"\xcb\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12;\n\x0e\x66ree_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x07\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x08\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x11\n\x0f_free_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xd6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefix"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*\x86\x03\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12"\n\x1eGPU_MODEL_NVIDIA_TESLA_T4_16GB\x10\n\x12$\n GPU_MODEL_NVIDIA_TESLA_V100_16GB\x10\x14\x12\x1d\n\x19GPU_MODEL_NVIDIA_A10_24GB\x10\x1e\x12\x1f\n\x1bGPU_MODEL_NVIDIA_A6000_48GB\x10(\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_40GB\x10\x32\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_80GB\x10\x33\x12"\n\x1eGPU_MODEL_NVIDIA_A100_PCI_40GB\x10\x34\x12#\n\x1fGPU_MODEL_NVIDIA_H100_SXM5_80GB\x10<\x12"\n\x1eGPU_MODEL_NVIDIA_H100_PCI_80GB\x10=\x12"\n\x1eGPU_MODEL_NVIDIA_RTX_6000_24GB\x10>*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
22
+ b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\x87\x02\n\x0b\x44\x61taPayload\x12\x11\n\x04path\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04size\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x03\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x04\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x05\x88\x01\x01\x42\x07\n\x05_pathB\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"\x99\x01\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x38\n\x10\x64\x65precated_model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x12\x12\n\x05model\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x08\n\x06_countB\x13\n\x11_deprecated_modelB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xc5\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x16\n\tgpu_count\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x0c\n\n_gpu_count"\xbf\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12<\n\x0fresource_limits\x18\x08 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x07\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limitsB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resources"\xe8\x01\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12\x1b\n\x0estatus_message\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_statusB\x11\n\x0f_status_message"\x9d\x06\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06\x66lavor\x18\x04 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorFlavorH\x03\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x05\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x06\x88\x01\x01\x12;\n\x0e\x66ree_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x07\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x08\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\t\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x0b\n\t_hostnameB\t\n\x07_flavorB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB\x11\n\x0f_free_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"l\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xa4\x05\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x08\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\x0b\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\x0c\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\x7f\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x87\x06\n\x18ReportTaskOutcomeRequest\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x32\n\x07outcome\x18\x07 \x01(\x0e\x32\x1c.executor_api_pb.TaskOutcomeH\x05\x88\x01\x01\x12\x1a\n\rinvocation_id\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x18\n\x0b\x65xecutor_id\x18\t \x01(\tH\x07\x88\x01\x01\x12\x14\n\x07reducer\x18\n \x01(\x08H\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x30\n\nfn_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12=\n\x0foutput_encoding\x18\r \x01(\x0e\x32\x1f.executor_api_pb.OutputEncodingH\x0b\x88\x01\x01\x12$\n\x17output_encoding_version\x18\x05 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\n\n\x08_outcomeB\x10\n\x0e_invocation_idB\x0e\n\x0c_executor_idB\n\n\x08_reducerB\t\n\x07_stdoutB\t\n\x07_stderrB\x12\n\x10_output_encodingB\x1a\n\x18_output_encoding_version"\x1b\n\x19ReportTaskOutcomeResponse*\xab\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03*!\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00*\xca\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08\x12%\n!FUNCTION_EXECUTOR_STATUS_SHUTDOWN\x10\t*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05*d\n\x0e\x45xecutorFlavor\x12\x1b\n\x17\x45XECUTOR_FLAVOR_UNKNOWN\x10\x00\x12\x17\n\x13\x45XECUTOR_FLAVOR_OSS\x10\x01\x12\x1c\n\x18\x45XECUTOR_FLAVOR_PLATFORM\x10\x02*[\n\x0bTaskOutcome\x12\x18\n\x14TASK_OUTCOME_UNKNOWN\x10\x00\x12\x18\n\x14TASK_OUTCOME_SUCCESS\x10\x01\x12\x18\n\x14TASK_OUTCOME_FAILURE\x10\x02*\x7f\n\x0eOutputEncoding\x12\x1b\n\x17OUTPUT_ENCODING_UNKNOWN\x10\x00\x12\x18\n\x14OUTPUT_ENCODING_JSON\x10\x01\x12\x1a\n\x16OUTPUT_ENCODING_PICKLE\x10\x02\x12\x1a\n\x16OUTPUT_ENCODING_BINARY\x10\x03\x32\xef\x02\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x12n\n\x13report_task_outcome\x12).executor_api_pb.ReportTaskOutcomeRequest\x1a*.executor_api_pb.ReportTaskOutcomeResponse"\x00\x62\x06proto3'
23
23
  )
24
24
 
25
25
  _globals = globals()
@@ -31,52 +31,56 @@ if not _descriptor._USE_C_DESCRIPTORS:
31
31
  DESCRIPTOR._loaded_options = None
32
32
  _globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
33
33
  _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
34
- _globals["_DATAPAYLOADENCODING"]._serialized_start = 4203
35
- _globals["_DATAPAYLOADENCODING"]._serialized_end = 4374
36
- _globals["_GPUMODEL"]._serialized_start = 4377
37
- _globals["_GPUMODEL"]._serialized_end = 4767
38
- _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 4770
39
- _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5228
40
- _globals["_EXECUTORSTATUS"]._serialized_start = 5231
41
- _globals["_EXECUTORSTATUS"]._serialized_end = 5426
42
- _globals["_EXECUTORFLAVOR"]._serialized_start = 5428
43
- _globals["_EXECUTORFLAVOR"]._serialized_end = 5528
44
- _globals["_TASKOUTCOME"]._serialized_start = 5530
45
- _globals["_TASKOUTCOME"]._serialized_end = 5621
46
- _globals["_OUTPUTENCODING"]._serialized_start = 5623
47
- _globals["_OUTPUTENCODING"]._serialized_end = 5750
34
+ _globals["_DATAPAYLOADENCODING"]._serialized_start = 4908
35
+ _globals["_DATAPAYLOADENCODING"]._serialized_end = 5079
36
+ _globals["_GPUMODEL"]._serialized_start = 5081
37
+ _globals["_GPUMODEL"]._serialized_end = 5114
38
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5117
39
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 5575
40
+ _globals["_EXECUTORSTATUS"]._serialized_start = 5578
41
+ _globals["_EXECUTORSTATUS"]._serialized_end = 5773
42
+ _globals["_EXECUTORFLAVOR"]._serialized_start = 5775
43
+ _globals["_EXECUTORFLAVOR"]._serialized_end = 5875
44
+ _globals["_TASKOUTCOME"]._serialized_start = 5877
45
+ _globals["_TASKOUTCOME"]._serialized_end = 5968
46
+ _globals["_OUTPUTENCODING"]._serialized_start = 5970
47
+ _globals["_OUTPUTENCODING"]._serialized_end = 6097
48
48
  _globals["_DATAPAYLOAD"]._serialized_start = 55
49
49
  _globals["_DATAPAYLOAD"]._serialized_end = 318
50
- _globals["_GPURESOURCES"]._serialized_start = 320
51
- _globals["_GPURESOURCES"]._serialized_end = 421
52
- _globals["_HOSTRESOURCES"]._serialized_start = 424
53
- _globals["_HOSTRESOURCES"]._serialized_end = 618
54
- _globals["_ALLOWEDFUNCTION"]._serialized_start = 621
55
- _globals["_ALLOWEDFUNCTION"]._serialized_end = 808
56
- _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 811
57
- _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1304
58
- _globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1307
59
- _globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1539
60
- _globals["_EXECUTORSTATE"]._serialized_start = 1542
61
- _globals["_EXECUTORSTATE"]._serialized_end = 2257
62
- _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2077
63
- _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2122
64
- _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 2259
65
- _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 2367
66
- _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 2369
67
- _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 2398
68
- _globals["_TASK"]._serialized_start = 2401
69
- _globals["_TASK"]._serialized_end = 2999
70
- _globals["_TASKALLOCATION"]._serialized_start = 3001
71
- _globals["_TASKALLOCATION"]._serialized_end = 3128
72
- _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 3130
73
- _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 3205
74
- _globals["_DESIREDEXECUTORSTATE"]._serialized_start = 3208
75
- _globals["_DESIREDEXECUTORSTATE"]._serialized_end = 3393
76
- _globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start = 3396
77
- _globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4171
78
- _globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4173
79
- _globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4200
80
- _globals["_EXECUTORAPI"]._serialized_start = 5753
81
- _globals["_EXECUTORAPI"]._serialized_end = 6120
50
+ _globals["_GPURESOURCES"]._serialized_start = 321
51
+ _globals["_GPURESOURCES"]._serialized_end = 474
52
+ _globals["_HOSTRESOURCES"]._serialized_start = 477
53
+ _globals["_HOSTRESOURCES"]._serialized_end = 671
54
+ _globals["_ALLOWEDFUNCTION"]._serialized_start = 674
55
+ _globals["_ALLOWEDFUNCTION"]._serialized_end = 861
56
+ _globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_start = 864
57
+ _globals["_FUNCTIONEXECUTORRESOURCES"]._serialized_end = 1061
58
+ _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 1064
59
+ _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 1639
60
+ _globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 1642
61
+ _globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1874
62
+ _globals["_EXECUTORSTATE"]._serialized_start = 1877
63
+ _globals["_EXECUTORSTATE"]._serialized_end = 2674
64
+ _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_start = 2474
65
+ _globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_end = 2519
66
+ _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 2676
67
+ _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 2784
68
+ _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 2786
69
+ _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 2815
70
+ _globals["_TASKRETRYPOLICY"]._serialized_start = 2818
71
+ _globals["_TASKRETRYPOLICY"]._serialized_end = 3025
72
+ _globals["_TASK"]._serialized_start = 3028
73
+ _globals["_TASK"]._serialized_end = 3704
74
+ _globals["_TASKALLOCATION"]._serialized_start = 3706
75
+ _globals["_TASKALLOCATION"]._serialized_end = 3833
76
+ _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 3835
77
+ _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 3910
78
+ _globals["_DESIREDEXECUTORSTATE"]._serialized_start = 3913
79
+ _globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4098
80
+ _globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_start = 4101
81
+ _globals["_REPORTTASKOUTCOMEREQUEST"]._serialized_end = 4876
82
+ _globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_start = 4878
83
+ _globals["_REPORTTASKOUTCOMERESPONSE"]._serialized_end = 4905
84
+ _globals["_EXECUTORAPI"]._serialized_start = 6100
85
+ _globals["_EXECUTORAPI"]._serialized_end = 6467
82
86
  # @@protoc_insertion_point(module_scope)
@@ -21,16 +21,6 @@ class DataPayloadEncoding(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
21
21
  class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
22
22
  __slots__ = ()
23
23
  GPU_MODEL_UNKNOWN: _ClassVar[GPUModel]
24
- GPU_MODEL_NVIDIA_TESLA_T4_16GB: _ClassVar[GPUModel]
25
- GPU_MODEL_NVIDIA_TESLA_V100_16GB: _ClassVar[GPUModel]
26
- GPU_MODEL_NVIDIA_A10_24GB: _ClassVar[GPUModel]
27
- GPU_MODEL_NVIDIA_A6000_48GB: _ClassVar[GPUModel]
28
- GPU_MODEL_NVIDIA_A100_SXM4_40GB: _ClassVar[GPUModel]
29
- GPU_MODEL_NVIDIA_A100_SXM4_80GB: _ClassVar[GPUModel]
30
- GPU_MODEL_NVIDIA_A100_PCI_40GB: _ClassVar[GPUModel]
31
- GPU_MODEL_NVIDIA_H100_SXM5_80GB: _ClassVar[GPUModel]
32
- GPU_MODEL_NVIDIA_H100_PCI_80GB: _ClassVar[GPUModel]
33
- GPU_MODEL_NVIDIA_RTX_6000_24GB: _ClassVar[GPUModel]
34
24
 
35
25
  class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
36
26
  __slots__ = ()
@@ -82,16 +72,6 @@ DATA_PAYLOAD_ENCODING_UTF8_JSON: DataPayloadEncoding
82
72
  DATA_PAYLOAD_ENCODING_UTF8_TEXT: DataPayloadEncoding
83
73
  DATA_PAYLOAD_ENCODING_BINARY_PICKLE: DataPayloadEncoding
84
74
  GPU_MODEL_UNKNOWN: GPUModel
85
- GPU_MODEL_NVIDIA_TESLA_T4_16GB: GPUModel
86
- GPU_MODEL_NVIDIA_TESLA_V100_16GB: GPUModel
87
- GPU_MODEL_NVIDIA_A10_24GB: GPUModel
88
- GPU_MODEL_NVIDIA_A6000_48GB: GPUModel
89
- GPU_MODEL_NVIDIA_A100_SXM4_40GB: GPUModel
90
- GPU_MODEL_NVIDIA_A100_SXM4_80GB: GPUModel
91
- GPU_MODEL_NVIDIA_A100_PCI_40GB: GPUModel
92
- GPU_MODEL_NVIDIA_H100_SXM5_80GB: GPUModel
93
- GPU_MODEL_NVIDIA_H100_PCI_80GB: GPUModel
94
- GPU_MODEL_NVIDIA_RTX_6000_24GB: GPUModel
95
75
  FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
96
76
  FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
97
77
  FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
@@ -144,13 +124,18 @@ class DataPayload(_message.Message):
144
124
  ) -> None: ...
145
125
 
146
126
  class GPUResources(_message.Message):
147
- __slots__ = ("count", "model")
127
+ __slots__ = ("count", "deprecated_model", "model")
148
128
  COUNT_FIELD_NUMBER: _ClassVar[int]
129
+ DEPRECATED_MODEL_FIELD_NUMBER: _ClassVar[int]
149
130
  MODEL_FIELD_NUMBER: _ClassVar[int]
150
131
  count: int
151
- model: GPUModel
132
+ deprecated_model: GPUModel
133
+ model: str
152
134
  def __init__(
153
- self, count: _Optional[int] = ..., model: _Optional[_Union[GPUModel, str]] = ...
135
+ self,
136
+ count: _Optional[int] = ...,
137
+ deprecated_model: _Optional[_Union[GPUModel, str]] = ...,
138
+ model: _Optional[str] = ...,
154
139
  ) -> None: ...
155
140
 
156
141
  class HostResources(_message.Message):
@@ -189,6 +174,24 @@ class AllowedFunction(_message.Message):
189
174
  graph_version: _Optional[str] = ...,
190
175
  ) -> None: ...
191
176
 
177
+ class FunctionExecutorResources(_message.Message):
178
+ __slots__ = ("cpu_ms_per_sec", "memory_bytes", "disk_bytes", "gpu_count")
179
+ CPU_MS_PER_SEC_FIELD_NUMBER: _ClassVar[int]
180
+ MEMORY_BYTES_FIELD_NUMBER: _ClassVar[int]
181
+ DISK_BYTES_FIELD_NUMBER: _ClassVar[int]
182
+ GPU_COUNT_FIELD_NUMBER: _ClassVar[int]
183
+ cpu_ms_per_sec: int
184
+ memory_bytes: int
185
+ disk_bytes: int
186
+ gpu_count: int
187
+ def __init__(
188
+ self,
189
+ cpu_ms_per_sec: _Optional[int] = ...,
190
+ memory_bytes: _Optional[int] = ...,
191
+ disk_bytes: _Optional[int] = ...,
192
+ gpu_count: _Optional[int] = ...,
193
+ ) -> None: ...
194
+
192
195
  class FunctionExecutorDescription(_message.Message):
193
196
  __slots__ = (
194
197
  "id",
@@ -201,6 +204,7 @@ class FunctionExecutorDescription(_message.Message):
201
204
  "resource_limits",
202
205
  "customer_code_timeout_ms",
203
206
  "graph",
207
+ "resources",
204
208
  )
205
209
  ID_FIELD_NUMBER: _ClassVar[int]
206
210
  NAMESPACE_FIELD_NUMBER: _ClassVar[int]
@@ -212,6 +216,7 @@ class FunctionExecutorDescription(_message.Message):
212
216
  RESOURCE_LIMITS_FIELD_NUMBER: _ClassVar[int]
213
217
  CUSTOMER_CODE_TIMEOUT_MS_FIELD_NUMBER: _ClassVar[int]
214
218
  GRAPH_FIELD_NUMBER: _ClassVar[int]
219
+ RESOURCES_FIELD_NUMBER: _ClassVar[int]
215
220
  id: str
216
221
  namespace: str
217
222
  graph_name: str
@@ -222,6 +227,7 @@ class FunctionExecutorDescription(_message.Message):
222
227
  resource_limits: HostResources
223
228
  customer_code_timeout_ms: int
224
229
  graph: DataPayload
230
+ resources: FunctionExecutorResources
225
231
  def __init__(
226
232
  self,
227
233
  id: _Optional[str] = ...,
@@ -234,6 +240,7 @@ class FunctionExecutorDescription(_message.Message):
234
240
  resource_limits: _Optional[_Union[HostResources, _Mapping]] = ...,
235
241
  customer_code_timeout_ms: _Optional[int] = ...,
236
242
  graph: _Optional[_Union[DataPayload, _Mapping]] = ...,
243
+ resources: _Optional[_Union[FunctionExecutorResources, _Mapping]] = ...,
237
244
  ) -> None: ...
238
245
 
239
246
  class FunctionExecutorState(_message.Message):
@@ -259,6 +266,7 @@ class ExecutorState(_message.Message):
259
266
  "flavor",
260
267
  "version",
261
268
  "status",
269
+ "total_resources",
262
270
  "free_resources",
263
271
  "allowed_functions",
264
272
  "function_executor_states",
@@ -283,6 +291,7 @@ class ExecutorState(_message.Message):
283
291
  FLAVOR_FIELD_NUMBER: _ClassVar[int]
284
292
  VERSION_FIELD_NUMBER: _ClassVar[int]
285
293
  STATUS_FIELD_NUMBER: _ClassVar[int]
294
+ TOTAL_RESOURCES_FIELD_NUMBER: _ClassVar[int]
286
295
  FREE_RESOURCES_FIELD_NUMBER: _ClassVar[int]
287
296
  ALLOWED_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
288
297
  FUNCTION_EXECUTOR_STATES_FIELD_NUMBER: _ClassVar[int]
@@ -295,6 +304,7 @@ class ExecutorState(_message.Message):
295
304
  flavor: ExecutorFlavor
296
305
  version: str
297
306
  status: ExecutorStatus
307
+ total_resources: HostResources
298
308
  free_resources: HostResources
299
309
  allowed_functions: _containers.RepeatedCompositeFieldContainer[AllowedFunction]
300
310
  function_executor_states: _containers.RepeatedCompositeFieldContainer[
@@ -311,6 +321,7 @@ class ExecutorState(_message.Message):
311
321
  flavor: _Optional[_Union[ExecutorFlavor, str]] = ...,
312
322
  version: _Optional[str] = ...,
313
323
  status: _Optional[_Union[ExecutorStatus, str]] = ...,
324
+ total_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
314
325
  free_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
315
326
  allowed_functions: _Optional[
316
327
  _Iterable[_Union[AllowedFunction, _Mapping]]
@@ -335,6 +346,24 @@ class ReportExecutorStateResponse(_message.Message):
335
346
  __slots__ = ()
336
347
  def __init__(self) -> None: ...
337
348
 
349
+ class TaskRetryPolicy(_message.Message):
350
+ __slots__ = ("max_retries", "initial_delay_ms", "max_delay_ms", "delay_multiplier")
351
+ MAX_RETRIES_FIELD_NUMBER: _ClassVar[int]
352
+ INITIAL_DELAY_MS_FIELD_NUMBER: _ClassVar[int]
353
+ MAX_DELAY_MS_FIELD_NUMBER: _ClassVar[int]
354
+ DELAY_MULTIPLIER_FIELD_NUMBER: _ClassVar[int]
355
+ max_retries: int
356
+ initial_delay_ms: int
357
+ max_delay_ms: int
358
+ delay_multiplier: int
359
+ def __init__(
360
+ self,
361
+ max_retries: _Optional[int] = ...,
362
+ initial_delay_ms: _Optional[int] = ...,
363
+ max_delay_ms: _Optional[int] = ...,
364
+ delay_multiplier: _Optional[int] = ...,
365
+ ) -> None: ...
366
+
338
367
  class Task(_message.Message):
339
368
  __slots__ = (
340
369
  "id",
@@ -349,6 +378,7 @@ class Task(_message.Message):
349
378
  "input",
350
379
  "reducer_input",
351
380
  "output_payload_uri_prefix",
381
+ "retry_policy",
352
382
  )
353
383
  ID_FIELD_NUMBER: _ClassVar[int]
354
384
  NAMESPACE_FIELD_NUMBER: _ClassVar[int]
@@ -362,6 +392,7 @@ class Task(_message.Message):
362
392
  INPUT_FIELD_NUMBER: _ClassVar[int]
363
393
  REDUCER_INPUT_FIELD_NUMBER: _ClassVar[int]
364
394
  OUTPUT_PAYLOAD_URI_PREFIX_FIELD_NUMBER: _ClassVar[int]
395
+ RETRY_POLICY_FIELD_NUMBER: _ClassVar[int]
365
396
  id: str
366
397
  namespace: str
367
398
  graph_name: str
@@ -374,6 +405,7 @@ class Task(_message.Message):
374
405
  input: DataPayload
375
406
  reducer_input: DataPayload
376
407
  output_payload_uri_prefix: str
408
+ retry_policy: TaskRetryPolicy
377
409
  def __init__(
378
410
  self,
379
411
  id: _Optional[str] = ...,
@@ -388,6 +420,7 @@ class Task(_message.Message):
388
420
  input: _Optional[_Union[DataPayload, _Mapping]] = ...,
389
421
  reducer_input: _Optional[_Union[DataPayload, _Mapping]] = ...,
390
422
  output_payload_uri_prefix: _Optional[str] = ...,
423
+ retry_policy: _Optional[_Union[TaskRetryPolicy, _Mapping]] = ...,
391
424
  ) -> None: ...
392
425
 
393
426
  class TaskAllocation(_message.Message):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.3.22
3
+ Version: 0.3.24
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,12 +1,12 @@
1
- indexify/cli/cli.py,sha256=k6vhgVKsHZvum2A2w7kbirpKusfqi_PyZGq29Q433_c,8950
1
+ indexify/cli/cli.py,sha256=RaKrcdrawu_1FrDV67o2QwMqfsPdADOcIxK2iTZvcho,9190
2
2
  indexify/executor/README.md,sha256=ozC6_hMkhQQNVCMEpBxwiUALz6lwErPQxNxQfQDqnG4,2029
3
- indexify/executor/api_objects.py,sha256=UEMpD_kKr6BNfu2JdYzq1CPNrEmVZYhwlZRKf3hB7KI,1750
3
+ indexify/executor/api_objects.py,sha256=kHx5gKPwM0Rm64Ea__kPFwuarStX0u_9uaE7vV5M5z8,2222
4
4
  indexify/executor/blob_store/blob_store.py,sha256=XViw_KRfFSNqwcFYwMZixZF-EYCjXK2AQHdt0xh4UVo,2368
5
5
  indexify/executor/blob_store/local_fs_blob_store.py,sha256=6LexqMBGXp8f6Ka95R6xMIUyDutrZJABOMNcp-ssa98,1809
6
6
  indexify/executor/blob_store/metrics/blob_store.py,sha256=5_xiPREeHWFtxFh1NupDsF8zP4pmUPgLNNn-UE9Uzvc,1008
7
7
  indexify/executor/blob_store/s3_blob_store.py,sha256=G3B_V3gUE7XbUY42lDtBczUKuA7q8S7MD43tx1aHrJo,3445
8
8
  indexify/executor/downloader.py,sha256=k9VbfOa-D6YH-cX8Sz-W-gWTsxmeVpSaIOq0xTC9KB0,15474
9
- indexify/executor/executor.py,sha256=K_xNJDImvIxgpE_ypw1ERyrijqOSe0EZIyVRVCqztVw,16697
9
+ indexify/executor/executor.py,sha256=WE9ABct1yAlfh4-cOUcp1vTjFbkiXNAGlsbsNbfWdkU,17006
10
10
  indexify/executor/executor_flavor.py,sha256=uilzDQVVYlQGR1MVnrUC4NevUActDWHdnJkr38M6kTk,118
11
11
  indexify/executor/function_executor/function_executor.py,sha256=agfUxzSQ-2TqkpMhW3OvOSMF_EhpemetaL3_dYp29Ro,11888
12
12
  indexify/executor/function_executor/function_executor_state.py,sha256=ljPm1IrRMJ8hFklwvFp7Xax2HMpUIOHm0DwOxxMcy7U,4336
@@ -22,21 +22,24 @@ indexify/executor/function_executor/metrics/invocation_state_client.py,sha256=6F
22
22
  indexify/executor/function_executor/metrics/single_task_runner.py,sha256=7BJlGkdPGKeufMs3zWNO_1GRVzjINRY5rW3Mp4oWWec,805
23
23
  indexify/executor/function_executor/server/client_configuration.py,sha256=gOywMus0cotlX6NKIadEJwvOmBE-LbGE_wvoMi5-HzY,994
24
24
  indexify/executor/function_executor/server/function_executor_server.py,sha256=_DLivLDikupZusRk8gVWDk7fWPT9XjZ4un1yWSlOObs,883
25
- indexify/executor/function_executor/server/function_executor_server_factory.py,sha256=z13MGGCWU_G5SKUJpX4Qb7m5fYhIAH_-pqmTBh0WBQ8,1758
25
+ indexify/executor/function_executor/server/function_executor_server_factory.py,sha256=xWEuDoxFqF-oC4RoiEer4S0Tk2tNbbJfA2kANZVShpM,1873
26
26
  indexify/executor/function_executor/server/subprocess_function_executor_server.py,sha256=JekDOqF7oFD4J6zcN3xB0Dxd1cgpEXMOsb_rKZOeBlI,668
27
27
  indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py,sha256=g1AUbhOoPsdhp_50Ayahdyv1Ix5-nEBE8orOQfkATpM,4470
28
- indexify/executor/function_executor/single_task_runner.py,sha256=CWC2U1SilX6yVl1890quqSLYLexjRoBI-3LxNDEOISA,14335
28
+ indexify/executor/function_executor/single_task_runner.py,sha256=6Fb9icnZ21pJcCq3mddFRoonPdNpUdSjTSQYh9nJXS0,14977
29
29
  indexify/executor/function_executor/task_input.py,sha256=wSrHR4m0juiGClQyeVdhRC37QzDt6Rrjq-ZXJkfBi9k,584
30
30
  indexify/executor/function_executor/task_output.py,sha256=vi0W75T8qbxjtAtI-812HksCv986lH_ijEqc3q0CVww,3424
31
31
  indexify/executor/grpc/channel_manager.py,sha256=ihDkLoiGBLfSmoA2szbntjCfL3E_NDf5LABRXE7YRec,6330
32
- indexify/executor/grpc/function_executor_controller.py,sha256=UWjchfRVpmT1uGIJraP25lGDhrKZh1CBtVrnrRK2dXE,16265
32
+ indexify/executor/grpc/function_executor_controller.py,sha256=3esPU9ILzYQ4gjfkRZ3IKHpqXWUQEmNit8XzQYKXoGs,17125
33
33
  indexify/executor/grpc/metrics/channel_manager.py,sha256=k-WArgklmP5WhjcmFmrgRblB7yc3XlaOXO8owRyV-mw,649
34
34
  indexify/executor/grpc/metrics/state_reconciler.py,sha256=0aI2IM4XztKxFa7NCxYSLafw_iiej3p07yEiKyewXIM,585
35
35
  indexify/executor/grpc/metrics/state_reporter.py,sha256=GggBEjMzQUYIG95LtTS4fUg1u9jYowkaXoUXppAXucs,543
36
36
  indexify/executor/grpc/metrics/task_controller.py,sha256=9Nm86nGxL2rZ3rAORB0_CBdO--Fe4MBrewVW4CqGyOU,222
37
37
  indexify/executor/grpc/state_reconciler.py,sha256=VYfKiy43XOA4OxbQUGVffIeqUaQQ37P4_aD9b_80Ls0,19264
38
- indexify/executor/grpc/state_reporter.py,sha256=uIouQMbk47pi90ceixKs7QEo_rjltQc0AfajryqbGzM,10494
38
+ indexify/executor/grpc/state_reporter.py,sha256=xXLX1se8iTVbccXlKNGNupQXER1M3Yd0VfpqIQJQJZg,11361
39
39
  indexify/executor/grpc/task_controller.py,sha256=JrRUkVa8pGoYWBg_RVfn1ThvDhYCgJSMACFPUww7Lys,20851
40
+ indexify/executor/host_resources/host_resources.py,sha256=V8g6R4ovgV-4mWKRO9qsS_SRCJScs4bcFdvxhk2Az_8,1560
41
+ indexify/executor/host_resources/nvidia_gpu.py,sha256=EZotTSMJtIQl51CpcbYoZpJcrQq1B6f8AbnxE2VI9to,2595
42
+ indexify/executor/host_resources/nvidia_gpu_allocator.py,sha256=oULSjL0AVo_nqR_pquq17079UalHQkhMwMqf72gbPHo,1872
40
43
  indexify/executor/metrics/downloader.py,sha256=lctPh8xjkXeLEFJnl1hNrD1yEhLhIl5sggsR4Yoe_Zc,2746
41
44
  indexify/executor/metrics/executor.py,sha256=ua-Vv_k1CB4juJdF7tEBQbBMksqWAA3iXKKMKXZUCLk,2369
42
45
  indexify/executor/metrics/task_fetcher.py,sha256=iJEwCLzYr2cuz7hRvNiqaa2nvQP4OrA0hm0iJY0YKG0,736
@@ -55,11 +58,11 @@ indexify/executor/runtime_probes.py,sha256=bo6Dq6AGZpJH099j0DHtVSDEH80tv3j9MXf3V
55
58
  indexify/executor/task_fetcher.py,sha256=p3iEsWyGi0ZMPAv0183smzOUD1KycQ_dXsyd9mpB9IU,3529
56
59
  indexify/executor/task_reporter.py,sha256=7X-IdLdwNBIfFbazG_4rtfR1A0ZFt03JGYpVJQUTKpE,16704
57
60
  indexify/executor/task_runner.py,sha256=UupZbGxU9BN4i1t6M8tH-5k3s4eUPEhMhar1YI0Aztk,7219
58
- indexify/proto/executor_api.proto,sha256=K1lwFmk042GA1tp8s633FZJVg6Fi8f8LtAuFj8Gz7XU,9930
59
- indexify/proto/executor_api_pb2.py,sha256=5y570_FIgc6WFhHVAKWFieMuUhyKBA7rPJJ4DJ5hcCM,14054
60
- indexify/proto/executor_api_pb2.pyi,sha256=5eJJJjPNdTMSttNUOtzGwADbASsCh7138de_Y3l8uq4,18612
61
+ indexify/proto/executor_api.proto,sha256=0l0kDcSNwAU-uTW7GTlg35DjYFs1iCiCVuaGZR1KOoY,10505
62
+ indexify/proto/executor_api_pb2.py,sha256=O-yQZt0jMTVXTTXCDs8qZeIi_fAF7Xi-youCXjjzNsA,15037
63
+ indexify/proto/executor_api_pb2.pyi,sha256=carOjGvH65dc7M50g_PTe6PXIMVpRLM_qWRaY28way4,19629
61
64
  indexify/proto/executor_api_pb2_grpc.py,sha256=GGiDtyQlA2382E_ZyKUBYcWNEJHH_RlulieStKfkJXI,9514
62
- indexify-0.3.22.dist-info/METADATA,sha256=cLQOCV8XooTEuzQDpuDKS-sJ6j78nM_7L889e_YUPCw,1198
63
- indexify-0.3.22.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
64
- indexify-0.3.22.dist-info/entry_points.txt,sha256=GU9wmsgvN7nQw3N2X0PMYn1RSvF6CrhH9RuC2D8d3Gk,53
65
- indexify-0.3.22.dist-info/RECORD,,
65
+ indexify-0.3.24.dist-info/METADATA,sha256=qUoUwLmKXouQwXbukZCYo0naWCySii5uI9GPuNEgeSI,1198
66
+ indexify-0.3.24.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
67
+ indexify-0.3.24.dist-info/entry_points.txt,sha256=GU9wmsgvN7nQw3N2X0PMYn1RSvF6CrhH9RuC2D8d3Gk,53
68
+ indexify-0.3.24.dist-info/RECORD,,