indexify 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/cli.py +11 -7
- indexify/executor/downloader.py +99 -50
- indexify/executor/executor.py +149 -28
- indexify/executor/function_executor/function_executor_state.py +23 -4
- indexify/executor/function_executor/function_executor_states_container.py +28 -16
- indexify/executor/function_executor/health_checker.py +26 -11
- indexify/executor/function_executor/server/function_executor_server_factory.py +4 -1
- indexify/executor/function_executor/single_task_runner.py +28 -8
- indexify/executor/function_executor/task_output.py +27 -4
- indexify/executor/state_reconciler.py +288 -0
- indexify/executor/state_reporter.py +127 -0
- indexify/executor/task_reporter.py +6 -6
- indexify/executor/task_runner.py +20 -12
- indexify/task_scheduler/proto/task_scheduler.proto +147 -0
- indexify/task_scheduler/proto/task_scheduler_pb2.py +69 -0
- indexify/task_scheduler/proto/task_scheduler_pb2.pyi +286 -0
- indexify/task_scheduler/proto/task_scheduler_pb2_grpc.py +170 -0
- {indexify-0.3.13.dist-info → indexify-0.3.14.dist-info}/METADATA +1 -1
- {indexify-0.3.13.dist-info → indexify-0.3.14.dist-info}/RECORD +21 -15
- {indexify-0.3.13.dist-info → indexify-0.3.14.dist-info}/WHEEL +0 -0
- {indexify-0.3.13.dist-info → indexify-0.3.14.dist-info}/entry_points.txt +0 -0
indexify/executor/task_runner.py
CHANGED
@@ -4,7 +4,6 @@ from .api_objects import Task
|
|
4
4
|
from .function_executor.function_executor_state import FunctionExecutorState
|
5
5
|
from .function_executor.function_executor_states_container import (
|
6
6
|
FunctionExecutorStatesContainer,
|
7
|
-
function_id_with_version,
|
8
7
|
)
|
9
8
|
from .function_executor.server.function_executor_server_factory import (
|
10
9
|
FunctionExecutorServerFactory,
|
@@ -34,7 +33,6 @@ class TaskRunner:
|
|
34
33
|
executor_id: str,
|
35
34
|
function_executor_server_factory: FunctionExecutorServerFactory,
|
36
35
|
base_url: str,
|
37
|
-
disable_automatic_function_executor_management: bool,
|
38
36
|
function_executor_states: FunctionExecutorStatesContainer,
|
39
37
|
config_path: Optional[str],
|
40
38
|
):
|
@@ -42,9 +40,6 @@ class TaskRunner:
|
|
42
40
|
self._factory: FunctionExecutorServerFactory = function_executor_server_factory
|
43
41
|
self._base_url: str = base_url
|
44
42
|
self._config_path: Optional[str] = config_path
|
45
|
-
self._disable_automatic_function_executor_management: bool = (
|
46
|
-
disable_automatic_function_executor_management
|
47
|
-
)
|
48
43
|
self._function_executor_states: FunctionExecutorStatesContainer = (
|
49
44
|
function_executor_states
|
50
45
|
)
|
@@ -76,7 +71,14 @@ class TaskRunner:
|
|
76
71
|
"failed running the task:",
|
77
72
|
exc_info=e,
|
78
73
|
)
|
79
|
-
return TaskOutput.internal_error(
|
74
|
+
return TaskOutput.internal_error(
|
75
|
+
task_id=task_input.task.id,
|
76
|
+
namespace=task_input.task.namespace,
|
77
|
+
graph_name=task_input.task.compute_graph,
|
78
|
+
function_name=task_input.task.compute_fn,
|
79
|
+
graph_version=task_input.task.graph_version,
|
80
|
+
graph_invocation_id=task_input.task.invocation_id,
|
81
|
+
)
|
80
82
|
finally:
|
81
83
|
if state is not None:
|
82
84
|
state.lock.release()
|
@@ -90,7 +92,12 @@ class TaskRunner:
|
|
90
92
|
"""
|
91
93
|
logger.info("task is blocked by policy")
|
92
94
|
state = await self._function_executor_states.get_or_create_state(
|
93
|
-
task_input.task
|
95
|
+
id=_function_id_without_version(task_input.task),
|
96
|
+
namespace=task_input.task.namespace,
|
97
|
+
graph_name=task_input.task.compute_graph,
|
98
|
+
graph_version=task_input.task.graph_version,
|
99
|
+
function_name=task_input.task.compute_fn,
|
100
|
+
image_uri=task_input.task.image_uri,
|
94
101
|
)
|
95
102
|
await state.lock.acquire()
|
96
103
|
|
@@ -111,12 +118,9 @@ class TaskRunner:
|
|
111
118
|
# - Each Function Executor rans at most 1 task concurrently.
|
112
119
|
await state.wait_running_tasks_less(1)
|
113
120
|
|
114
|
-
if
|
115
|
-
return # Disable Function Executor destroy in manual management mode.
|
116
|
-
|
117
|
-
if state.function_id_with_version != function_id_with_version(task):
|
121
|
+
if state.graph_version != task.graph_version:
|
118
122
|
await state.destroy_function_executor()
|
119
|
-
state.
|
123
|
+
state.graph_version = task.graph_version
|
120
124
|
# At this point the state belongs to the version of the function from the task
|
121
125
|
# and there are no running tasks in the Function Executor.
|
122
126
|
|
@@ -137,3 +141,7 @@ class TaskRunner:
|
|
137
141
|
|
138
142
|
async def shutdown(self) -> None:
|
139
143
|
pass
|
144
|
+
|
145
|
+
|
146
|
+
def _function_id_without_version(task: Task) -> str:
|
147
|
+
return f"not_versioned/{task.namespace}/{task.compute_graph}/{task.compute_fn}"
|
@@ -0,0 +1,147 @@
|
|
1
|
+
syntax = "proto3";
|
2
|
+
|
3
|
+
package task_scheduler_service;
|
4
|
+
|
5
|
+
// ===== ReportExecutorState RPC =====
|
6
|
+
|
7
|
+
enum GPUModel {
|
8
|
+
GPU_MODEL_UNKNOWN = 0;
|
9
|
+
GPU_MODEL_NVIDIA_TESLA_T4_16GB = 10;
|
10
|
+
GPU_MODEL_NVIDIA_TESLA_V100_16GB = 20;
|
11
|
+
GPU_MODEL_NVIDIA_A10_24GB = 30;
|
12
|
+
GPU_MODEL_NVIDIA_A6000_48GB = 40;
|
13
|
+
// A100 GPUs
|
14
|
+
GPU_MODEL_NVIDIA_A100_SXM4_40GB = 50;
|
15
|
+
GPU_MODEL_NVIDIA_A100_SXM4_80GB = 51;
|
16
|
+
GPU_MODEL_NVIDIA_A100_PCI_40GB = 52;
|
17
|
+
// H100 GPUs
|
18
|
+
GPU_MODEL_NVIDIA_H100_SXM5_80GB = 60;
|
19
|
+
GPU_MODEL_NVIDIA_H100_PCI_80GB = 61;
|
20
|
+
GPU_MODEL_NVIDIA_RTX_6000_24GB = 62;
|
21
|
+
}
|
22
|
+
|
23
|
+
// Free GPUs available at the Executor.
|
24
|
+
message GPUResources {
|
25
|
+
optional uint32 count = 1;
|
26
|
+
optional GPUModel model = 2;
|
27
|
+
}
|
28
|
+
|
29
|
+
// Free host resources available at the Executor.
|
30
|
+
message HostResources {
|
31
|
+
optional uint32 cpu_count = 1;
|
32
|
+
optional uint64 memory_bytes = 2;
|
33
|
+
optional uint64 disk_bytes = 3;
|
34
|
+
optional GPUResources gpu = 4;
|
35
|
+
}
|
36
|
+
|
37
|
+
// Specification of a single function that is allowed to be run on the Executor.
|
38
|
+
message AllowedFunction {
|
39
|
+
optional string namespace = 1;
|
40
|
+
optional string graph_name = 2;
|
41
|
+
optional string function_name = 3;
|
42
|
+
// If none then any version of the graph is allowed to run on the Executor.
|
43
|
+
optional string graph_version = 4;
|
44
|
+
}
|
45
|
+
|
46
|
+
enum FunctionExecutorStatus {
|
47
|
+
FUNCTION_EXECUTOR_STATUS_UNKNOWN = 0;
|
48
|
+
FUNCTION_EXECUTOR_STATUS_STOPPED = 1;
|
49
|
+
FUNCTION_EXECUTOR_STATUS_STARTING_UP = 2;
|
50
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR = 3;
|
51
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR = 4;
|
52
|
+
FUNCTION_EXECUTOR_STATUS_IDLE = 5;
|
53
|
+
FUNCTION_EXECUTOR_STATUS_RUNNING_TASK = 6;
|
54
|
+
FUNCTION_EXECUTOR_STATUS_UNHEALTHY = 7;
|
55
|
+
FUNCTION_EXECUTOR_STATUS_STOPPING = 8;
|
56
|
+
}
|
57
|
+
|
58
|
+
// Immutable information that identifies and describes a Function Executor.
|
59
|
+
message FunctionExecutorDescription {
|
60
|
+
optional string id = 1;
|
61
|
+
optional string namespace = 2;
|
62
|
+
optional string graph_name = 3;
|
63
|
+
optional string graph_version = 4;
|
64
|
+
optional string function_name = 5;
|
65
|
+
optional string image_uri = 6;
|
66
|
+
}
|
67
|
+
|
68
|
+
message FunctionExecutorState {
|
69
|
+
optional FunctionExecutorDescription description = 1;
|
70
|
+
optional FunctionExecutorStatus status = 2;
|
71
|
+
}
|
72
|
+
|
73
|
+
enum ExecutorStatus {
|
74
|
+
EXECUTOR_STATUS_UNKNOWN = 0;
|
75
|
+
EXECUTOR_STATUS_STARTING = 1;
|
76
|
+
EXECUTOR_STATUS_RUNNING = 2;
|
77
|
+
EXECUTOR_STATUS_DRAINED = 3;
|
78
|
+
EXECUTOR_STATUS_SHUTTING_DOWN = 4;
|
79
|
+
}
|
80
|
+
|
81
|
+
message ExecutorState {
|
82
|
+
optional string executor_id = 1;
|
83
|
+
optional ExecutorStatus executor_status = 2;
|
84
|
+
optional HostResources host_resources = 3;
|
85
|
+
// Empty allowed_functions list means that any function can run on the Executor.
|
86
|
+
repeated AllowedFunction allowed_functions = 4;
|
87
|
+
repeated FunctionExecutorState function_executor_states = 5;
|
88
|
+
}
|
89
|
+
|
90
|
+
// A message sent by Executor to report its up to date state to Server.
|
91
|
+
message ReportExecutorStateRequest {
|
92
|
+
optional ExecutorState executor_state = 1;
|
93
|
+
}
|
94
|
+
|
95
|
+
// A message sent by Server to Executor to acknowledge the receipt of Executor state.
|
96
|
+
message ReportExecutorStateResponse {
|
97
|
+
}
|
98
|
+
|
99
|
+
// ===== GetDesiredExecutorStates RPC =====
|
100
|
+
message Task {
|
101
|
+
optional string id = 1;
|
102
|
+
optional string namespace = 2;
|
103
|
+
optional string graph_name = 3;
|
104
|
+
optional string graph_version = 4;
|
105
|
+
optional string function_name = 5;
|
106
|
+
optional string graph_invocation_id = 6;
|
107
|
+
optional string input_key = 8;
|
108
|
+
optional string reducer_output_key = 9;
|
109
|
+
}
|
110
|
+
|
111
|
+
message TaskAllocation {
|
112
|
+
optional string function_executor_id = 1;
|
113
|
+
optional Task task = 2;
|
114
|
+
}
|
115
|
+
|
116
|
+
// A message sent by Executor to Server to open the stream of desired Executor States for the Executor.
|
117
|
+
message GetDesiredExecutorStatesRequest {
|
118
|
+
optional string executor_id = 1;
|
119
|
+
}
|
120
|
+
|
121
|
+
// A message sent from Server to Executor that describes the desired state of the Executor at the moment.
|
122
|
+
// Executor compares this state with its current state and make necessary changes to match the desired state.
|
123
|
+
message DesiredExecutorState {
|
124
|
+
repeated FunctionExecutorDescription function_executors = 1;
|
125
|
+
repeated TaskAllocation task_allocations = 2;
|
126
|
+
// Server supplied clock value used to deduplicate messages. Executor records max clock value
|
127
|
+
// it observed and ignores all the messages with clock value <= the max observed value.
|
128
|
+
optional uint64 clock = 3;
|
129
|
+
}
|
130
|
+
|
131
|
+
// Internal API for scheduling and running tasks on Executors. Executors are acting as clients of this API.
|
132
|
+
// Server is responsible for scheduling tasks on Executors and Executors are responsible for running the tasks.
|
133
|
+
service TaskSchedulerService {
|
134
|
+
// Called by Executor every 5 seconds to report that it's still alive and provide its current state.
|
135
|
+
//
|
136
|
+
// Missing 3 reports will result in the Executor being deregistered by Server.
|
137
|
+
rpc report_executor_state(ReportExecutorStateRequest) returns (ReportExecutorStateResponse) {}
|
138
|
+
|
139
|
+
// Called by Executor to open a stream of its desired states. When Server wants Executor to change something
|
140
|
+
// it puts a message on the stream with the new desired state of the Executor.
|
141
|
+
//
|
142
|
+
// Depricated HTTP API is used to download the serialized graph and task inputs.
|
143
|
+
rpc get_desired_executor_states(GetDesiredExecutorStatesRequest) returns (stream DesiredExecutorState) {}
|
144
|
+
|
145
|
+
// Task outcome is currently reported via depricated HTTP API. We're going to migrate task output reporting to gRPC
|
146
|
+
// when we move S3 downloads and uploads to Executor.
|
147
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
4
|
+
# source: indexify/task_scheduler/proto/task_scheduler.proto
|
5
|
+
# Protobuf Python Version: 5.29.0
|
6
|
+
"""Generated protocol buffer code."""
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
11
|
+
from google.protobuf.internal import builder as _builder
|
12
|
+
|
13
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
+
_runtime_version.Domain.PUBLIC,
|
15
|
+
5,
|
16
|
+
29,
|
17
|
+
0,
|
18
|
+
"",
|
19
|
+
"indexify/task_scheduler/proto/task_scheduler.proto",
|
20
|
+
)
|
21
|
+
# @@protoc_insertion_point(imports)
|
22
|
+
|
23
|
+
_sym_db = _symbol_database.Default()
|
24
|
+
|
25
|
+
|
26
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
27
|
+
b'\n2indexify/task_scheduler/proto/task_scheduler.proto\x12\x16task_scheduler_service"l\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x34\n\x05model\x18\x02 \x01(\x0e\x32 .task_scheduler_service.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc9\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x36\n\x03gpu\x18\x04 \x01(\x0b\x32$.task_scheduler_service.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\x85\x02\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uri"\xc6\x01\n\x15\x46unctionExecutorState\x12M\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32\x33.task_scheduler_service.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x43\n\x06status\x18\x02 \x01(\x0e\x32..task_scheduler_service.FunctionExecutorStatusH\x01\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_status"\xff\x02\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x44\n\x0f\x65xecutor_status\x18\x02 \x01(\x0e\x32&.task_scheduler_service.ExecutorStatusH\x01\x88\x01\x01\x12\x42\n\x0ehost_resources\x18\x03 \x01(\x0b\x32%.task_scheduler_service.HostResourcesH\x02\x88\x01\x01\x12\x42\n\x11\x61llowed_functions\x18\x04 \x03(\x0b\x32\'.task_scheduler_service.AllowedFunction\x12O\n\x18\x66unction_executor_states\x18\x05 \x03(\x0b\x32-.task_scheduler_service.FunctionExecutorStateB\x0e\n\x0c_executor_idB\x12\n\x10_executor_statusB\x11\n\x0f_host_resources"s\n\x1aReportExecutorStateRequest\x12\x42\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32%.task_scheduler_service.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\xe0\x02\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_key"\x86\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12/\n\x04task\x18\x02 \x01(\x0b\x32\x1c.task_scheduler_service.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xc7\x01\n\x14\x44\x65siredExecutorState\x12O\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32\x33.task_scheduler_service.FunctionExecutorDescription\x12@\n\x10task_allocations\x18\x02 \x03(\x0b\x32&.task_scheduler_service.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock*\x86\x03\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12"\n\x1eGPU_MODEL_NVIDIA_TESLA_T4_16GB\x10\n\x12$\n GPU_MODEL_NVIDIA_TESLA_V100_16GB\x10\x14\x12\x1d\n\x19GPU_MODEL_NVIDIA_A10_24GB\x10\x1e\x12\x1f\n\x1bGPU_MODEL_NVIDIA_A6000_48GB\x10(\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_40GB\x10\x32\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_80GB\x10\x33\x12"\n\x1eGPU_MODEL_NVIDIA_A100_PCI_40GB\x10\x34\x12#\n\x1fGPU_MODEL_NVIDIA_H100_SXM5_80GB\x10<\x12"\n\x1eGPU_MODEL_NVIDIA_H100_PCI_80GB\x10=\x12"\n\x1eGPU_MODEL_NVIDIA_RTX_6000_24GB\x10>*\xa3\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x01\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x03\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x04\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x05\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x06\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x07\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x08*\xa8\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1c\n\x18\x45XECUTOR_STATUS_STARTING\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12!\n\x1d\x45XECUTOR_STATUS_SHUTTING_DOWN\x10\x04\x32\xa6\x02\n\x14TaskSchedulerService\x12\x82\x01\n\x15report_executor_state\x12\x32.task_scheduler_service.ReportExecutorStateRequest\x1a\x33.task_scheduler_service.ReportExecutorStateResponse"\x00\x12\x88\x01\n\x1bget_desired_executor_states\x12\x37.task_scheduler_service.GetDesiredExecutorStatesRequest\x1a,.task_scheduler_service.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
28
|
+
)
|
29
|
+
|
30
|
+
_globals = globals()
|
31
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
32
|
+
_builder.BuildTopDescriptorsAndMessages(
|
33
|
+
DESCRIPTOR, "indexify.task_scheduler.proto.task_scheduler_pb2", _globals
|
34
|
+
)
|
35
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
36
|
+
DESCRIPTOR._loaded_options = None
|
37
|
+
_globals["_GPUMODEL"]._serialized_start = 2353
|
38
|
+
_globals["_GPUMODEL"]._serialized_end = 2743
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 2746
|
40
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 3165
|
41
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 3168
|
42
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 3336
|
43
|
+
_globals["_GPURESOURCES"]._serialized_start = 78
|
44
|
+
_globals["_GPURESOURCES"]._serialized_end = 186
|
45
|
+
_globals["_HOSTRESOURCES"]._serialized_start = 189
|
46
|
+
_globals["_HOSTRESOURCES"]._serialized_end = 390
|
47
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_start = 393
|
48
|
+
_globals["_ALLOWEDFUNCTION"]._serialized_end = 580
|
49
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 583
|
50
|
+
_globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 844
|
51
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 847
|
52
|
+
_globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1045
|
53
|
+
_globals["_EXECUTORSTATE"]._serialized_start = 1048
|
54
|
+
_globals["_EXECUTORSTATE"]._serialized_end = 1431
|
55
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 1433
|
56
|
+
_globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 1548
|
57
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 1550
|
58
|
+
_globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 1579
|
59
|
+
_globals["_TASK"]._serialized_start = 1582
|
60
|
+
_globals["_TASK"]._serialized_end = 1934
|
61
|
+
_globals["_TASKALLOCATION"]._serialized_start = 1937
|
62
|
+
_globals["_TASKALLOCATION"]._serialized_end = 2071
|
63
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 2073
|
64
|
+
_globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 2148
|
65
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 2151
|
66
|
+
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 2350
|
67
|
+
_globals["_TASKSCHEDULERSERVICE"]._serialized_start = 3339
|
68
|
+
_globals["_TASKSCHEDULERSERVICE"]._serialized_end = 3633
|
69
|
+
# @@protoc_insertion_point(module_scope)
|
@@ -0,0 +1,286 @@
|
|
1
|
+
from typing import ClassVar as _ClassVar
|
2
|
+
from typing import Iterable as _Iterable
|
3
|
+
from typing import Mapping as _Mapping
|
4
|
+
from typing import Optional as _Optional
|
5
|
+
from typing import Union as _Union
|
6
|
+
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
8
|
+
from google.protobuf import message as _message
|
9
|
+
from google.protobuf.internal import containers as _containers
|
10
|
+
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
11
|
+
|
12
|
+
DESCRIPTOR: _descriptor.FileDescriptor
|
13
|
+
|
14
|
+
class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
15
|
+
__slots__ = ()
|
16
|
+
GPU_MODEL_UNKNOWN: _ClassVar[GPUModel]
|
17
|
+
GPU_MODEL_NVIDIA_TESLA_T4_16GB: _ClassVar[GPUModel]
|
18
|
+
GPU_MODEL_NVIDIA_TESLA_V100_16GB: _ClassVar[GPUModel]
|
19
|
+
GPU_MODEL_NVIDIA_A10_24GB: _ClassVar[GPUModel]
|
20
|
+
GPU_MODEL_NVIDIA_A6000_48GB: _ClassVar[GPUModel]
|
21
|
+
GPU_MODEL_NVIDIA_A100_SXM4_40GB: _ClassVar[GPUModel]
|
22
|
+
GPU_MODEL_NVIDIA_A100_SXM4_80GB: _ClassVar[GPUModel]
|
23
|
+
GPU_MODEL_NVIDIA_A100_PCI_40GB: _ClassVar[GPUModel]
|
24
|
+
GPU_MODEL_NVIDIA_H100_SXM5_80GB: _ClassVar[GPUModel]
|
25
|
+
GPU_MODEL_NVIDIA_H100_PCI_80GB: _ClassVar[GPUModel]
|
26
|
+
GPU_MODEL_NVIDIA_RTX_6000_24GB: _ClassVar[GPUModel]
|
27
|
+
|
28
|
+
class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
29
|
+
__slots__ = ()
|
30
|
+
FUNCTION_EXECUTOR_STATUS_UNKNOWN: _ClassVar[FunctionExecutorStatus]
|
31
|
+
FUNCTION_EXECUTOR_STATUS_STOPPED: _ClassVar[FunctionExecutorStatus]
|
32
|
+
FUNCTION_EXECUTOR_STATUS_STARTING_UP: _ClassVar[FunctionExecutorStatus]
|
33
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: _ClassVar[
|
34
|
+
FunctionExecutorStatus
|
35
|
+
]
|
36
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR: _ClassVar[
|
37
|
+
FunctionExecutorStatus
|
38
|
+
]
|
39
|
+
FUNCTION_EXECUTOR_STATUS_IDLE: _ClassVar[FunctionExecutorStatus]
|
40
|
+
FUNCTION_EXECUTOR_STATUS_RUNNING_TASK: _ClassVar[FunctionExecutorStatus]
|
41
|
+
FUNCTION_EXECUTOR_STATUS_UNHEALTHY: _ClassVar[FunctionExecutorStatus]
|
42
|
+
FUNCTION_EXECUTOR_STATUS_STOPPING: _ClassVar[FunctionExecutorStatus]
|
43
|
+
|
44
|
+
class ExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
45
|
+
__slots__ = ()
|
46
|
+
EXECUTOR_STATUS_UNKNOWN: _ClassVar[ExecutorStatus]
|
47
|
+
EXECUTOR_STATUS_STARTING: _ClassVar[ExecutorStatus]
|
48
|
+
EXECUTOR_STATUS_RUNNING: _ClassVar[ExecutorStatus]
|
49
|
+
EXECUTOR_STATUS_DRAINED: _ClassVar[ExecutorStatus]
|
50
|
+
EXECUTOR_STATUS_SHUTTING_DOWN: _ClassVar[ExecutorStatus]
|
51
|
+
|
52
|
+
GPU_MODEL_UNKNOWN: GPUModel
|
53
|
+
GPU_MODEL_NVIDIA_TESLA_T4_16GB: GPUModel
|
54
|
+
GPU_MODEL_NVIDIA_TESLA_V100_16GB: GPUModel
|
55
|
+
GPU_MODEL_NVIDIA_A10_24GB: GPUModel
|
56
|
+
GPU_MODEL_NVIDIA_A6000_48GB: GPUModel
|
57
|
+
GPU_MODEL_NVIDIA_A100_SXM4_40GB: GPUModel
|
58
|
+
GPU_MODEL_NVIDIA_A100_SXM4_80GB: GPUModel
|
59
|
+
GPU_MODEL_NVIDIA_A100_PCI_40GB: GPUModel
|
60
|
+
GPU_MODEL_NVIDIA_H100_SXM5_80GB: GPUModel
|
61
|
+
GPU_MODEL_NVIDIA_H100_PCI_80GB: GPUModel
|
62
|
+
GPU_MODEL_NVIDIA_RTX_6000_24GB: GPUModel
|
63
|
+
FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
|
64
|
+
FUNCTION_EXECUTOR_STATUS_STOPPED: FunctionExecutorStatus
|
65
|
+
FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
|
66
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
|
67
|
+
FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR: FunctionExecutorStatus
|
68
|
+
FUNCTION_EXECUTOR_STATUS_IDLE: FunctionExecutorStatus
|
69
|
+
FUNCTION_EXECUTOR_STATUS_RUNNING_TASK: FunctionExecutorStatus
|
70
|
+
FUNCTION_EXECUTOR_STATUS_UNHEALTHY: FunctionExecutorStatus
|
71
|
+
FUNCTION_EXECUTOR_STATUS_STOPPING: FunctionExecutorStatus
|
72
|
+
EXECUTOR_STATUS_UNKNOWN: ExecutorStatus
|
73
|
+
EXECUTOR_STATUS_STARTING: ExecutorStatus
|
74
|
+
EXECUTOR_STATUS_RUNNING: ExecutorStatus
|
75
|
+
EXECUTOR_STATUS_DRAINED: ExecutorStatus
|
76
|
+
EXECUTOR_STATUS_SHUTTING_DOWN: ExecutorStatus
|
77
|
+
|
78
|
+
class GPUResources(_message.Message):
|
79
|
+
__slots__ = ("count", "model")
|
80
|
+
COUNT_FIELD_NUMBER: _ClassVar[int]
|
81
|
+
MODEL_FIELD_NUMBER: _ClassVar[int]
|
82
|
+
count: int
|
83
|
+
model: GPUModel
|
84
|
+
def __init__(
|
85
|
+
self, count: _Optional[int] = ..., model: _Optional[_Union[GPUModel, str]] = ...
|
86
|
+
) -> None: ...
|
87
|
+
|
88
|
+
class HostResources(_message.Message):
|
89
|
+
__slots__ = ("cpu_count", "memory_bytes", "disk_bytes", "gpu")
|
90
|
+
CPU_COUNT_FIELD_NUMBER: _ClassVar[int]
|
91
|
+
MEMORY_BYTES_FIELD_NUMBER: _ClassVar[int]
|
92
|
+
DISK_BYTES_FIELD_NUMBER: _ClassVar[int]
|
93
|
+
GPU_FIELD_NUMBER: _ClassVar[int]
|
94
|
+
cpu_count: int
|
95
|
+
memory_bytes: int
|
96
|
+
disk_bytes: int
|
97
|
+
gpu: GPUResources
|
98
|
+
def __init__(
|
99
|
+
self,
|
100
|
+
cpu_count: _Optional[int] = ...,
|
101
|
+
memory_bytes: _Optional[int] = ...,
|
102
|
+
disk_bytes: _Optional[int] = ...,
|
103
|
+
gpu: _Optional[_Union[GPUResources, _Mapping]] = ...,
|
104
|
+
) -> None: ...
|
105
|
+
|
106
|
+
class AllowedFunction(_message.Message):
|
107
|
+
__slots__ = ("namespace", "graph_name", "function_name", "graph_version")
|
108
|
+
NAMESPACE_FIELD_NUMBER: _ClassVar[int]
|
109
|
+
GRAPH_NAME_FIELD_NUMBER: _ClassVar[int]
|
110
|
+
FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
|
111
|
+
GRAPH_VERSION_FIELD_NUMBER: _ClassVar[int]
|
112
|
+
namespace: str
|
113
|
+
graph_name: str
|
114
|
+
function_name: str
|
115
|
+
graph_version: str
|
116
|
+
def __init__(
|
117
|
+
self,
|
118
|
+
namespace: _Optional[str] = ...,
|
119
|
+
graph_name: _Optional[str] = ...,
|
120
|
+
function_name: _Optional[str] = ...,
|
121
|
+
graph_version: _Optional[str] = ...,
|
122
|
+
) -> None: ...
|
123
|
+
|
124
|
+
class FunctionExecutorDescription(_message.Message):
|
125
|
+
__slots__ = (
|
126
|
+
"id",
|
127
|
+
"namespace",
|
128
|
+
"graph_name",
|
129
|
+
"graph_version",
|
130
|
+
"function_name",
|
131
|
+
"image_uri",
|
132
|
+
)
|
133
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
134
|
+
NAMESPACE_FIELD_NUMBER: _ClassVar[int]
|
135
|
+
GRAPH_NAME_FIELD_NUMBER: _ClassVar[int]
|
136
|
+
GRAPH_VERSION_FIELD_NUMBER: _ClassVar[int]
|
137
|
+
FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
|
138
|
+
IMAGE_URI_FIELD_NUMBER: _ClassVar[int]
|
139
|
+
id: str
|
140
|
+
namespace: str
|
141
|
+
graph_name: str
|
142
|
+
graph_version: str
|
143
|
+
function_name: str
|
144
|
+
image_uri: str
|
145
|
+
def __init__(
|
146
|
+
self,
|
147
|
+
id: _Optional[str] = ...,
|
148
|
+
namespace: _Optional[str] = ...,
|
149
|
+
graph_name: _Optional[str] = ...,
|
150
|
+
graph_version: _Optional[str] = ...,
|
151
|
+
function_name: _Optional[str] = ...,
|
152
|
+
image_uri: _Optional[str] = ...,
|
153
|
+
) -> None: ...
|
154
|
+
|
155
|
+
class FunctionExecutorState(_message.Message):
|
156
|
+
__slots__ = ("description", "status")
|
157
|
+
DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
|
158
|
+
STATUS_FIELD_NUMBER: _ClassVar[int]
|
159
|
+
description: FunctionExecutorDescription
|
160
|
+
status: FunctionExecutorStatus
|
161
|
+
def __init__(
|
162
|
+
self,
|
163
|
+
description: _Optional[_Union[FunctionExecutorDescription, _Mapping]] = ...,
|
164
|
+
status: _Optional[_Union[FunctionExecutorStatus, str]] = ...,
|
165
|
+
) -> None: ...
|
166
|
+
|
167
|
+
class ExecutorState(_message.Message):
|
168
|
+
__slots__ = (
|
169
|
+
"executor_id",
|
170
|
+
"executor_status",
|
171
|
+
"host_resources",
|
172
|
+
"allowed_functions",
|
173
|
+
"function_executor_states",
|
174
|
+
)
|
175
|
+
EXECUTOR_ID_FIELD_NUMBER: _ClassVar[int]
|
176
|
+
EXECUTOR_STATUS_FIELD_NUMBER: _ClassVar[int]
|
177
|
+
HOST_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
178
|
+
ALLOWED_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
|
179
|
+
FUNCTION_EXECUTOR_STATES_FIELD_NUMBER: _ClassVar[int]
|
180
|
+
executor_id: str
|
181
|
+
executor_status: ExecutorStatus
|
182
|
+
host_resources: HostResources
|
183
|
+
allowed_functions: _containers.RepeatedCompositeFieldContainer[AllowedFunction]
|
184
|
+
function_executor_states: _containers.RepeatedCompositeFieldContainer[
|
185
|
+
FunctionExecutorState
|
186
|
+
]
|
187
|
+
def __init__(
|
188
|
+
self,
|
189
|
+
executor_id: _Optional[str] = ...,
|
190
|
+
executor_status: _Optional[_Union[ExecutorStatus, str]] = ...,
|
191
|
+
host_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
|
192
|
+
allowed_functions: _Optional[
|
193
|
+
_Iterable[_Union[AllowedFunction, _Mapping]]
|
194
|
+
] = ...,
|
195
|
+
function_executor_states: _Optional[
|
196
|
+
_Iterable[_Union[FunctionExecutorState, _Mapping]]
|
197
|
+
] = ...,
|
198
|
+
) -> None: ...
|
199
|
+
|
200
|
+
class ReportExecutorStateRequest(_message.Message):
|
201
|
+
__slots__ = ("executor_state",)
|
202
|
+
EXECUTOR_STATE_FIELD_NUMBER: _ClassVar[int]
|
203
|
+
executor_state: ExecutorState
|
204
|
+
def __init__(
|
205
|
+
self, executor_state: _Optional[_Union[ExecutorState, _Mapping]] = ...
|
206
|
+
) -> None: ...
|
207
|
+
|
208
|
+
class ReportExecutorStateResponse(_message.Message):
|
209
|
+
__slots__ = ()
|
210
|
+
def __init__(self) -> None: ...
|
211
|
+
|
212
|
+
class Task(_message.Message):
|
213
|
+
__slots__ = (
|
214
|
+
"id",
|
215
|
+
"namespace",
|
216
|
+
"graph_name",
|
217
|
+
"graph_version",
|
218
|
+
"function_name",
|
219
|
+
"graph_invocation_id",
|
220
|
+
"input_key",
|
221
|
+
"reducer_output_key",
|
222
|
+
)
|
223
|
+
ID_FIELD_NUMBER: _ClassVar[int]
|
224
|
+
NAMESPACE_FIELD_NUMBER: _ClassVar[int]
|
225
|
+
GRAPH_NAME_FIELD_NUMBER: _ClassVar[int]
|
226
|
+
GRAPH_VERSION_FIELD_NUMBER: _ClassVar[int]
|
227
|
+
FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
|
228
|
+
GRAPH_INVOCATION_ID_FIELD_NUMBER: _ClassVar[int]
|
229
|
+
INPUT_KEY_FIELD_NUMBER: _ClassVar[int]
|
230
|
+
REDUCER_OUTPUT_KEY_FIELD_NUMBER: _ClassVar[int]
|
231
|
+
id: str
|
232
|
+
namespace: str
|
233
|
+
graph_name: str
|
234
|
+
graph_version: str
|
235
|
+
function_name: str
|
236
|
+
graph_invocation_id: str
|
237
|
+
input_key: str
|
238
|
+
reducer_output_key: str
|
239
|
+
def __init__(
|
240
|
+
self,
|
241
|
+
id: _Optional[str] = ...,
|
242
|
+
namespace: _Optional[str] = ...,
|
243
|
+
graph_name: _Optional[str] = ...,
|
244
|
+
graph_version: _Optional[str] = ...,
|
245
|
+
function_name: _Optional[str] = ...,
|
246
|
+
graph_invocation_id: _Optional[str] = ...,
|
247
|
+
input_key: _Optional[str] = ...,
|
248
|
+
reducer_output_key: _Optional[str] = ...,
|
249
|
+
) -> None: ...
|
250
|
+
|
251
|
+
class TaskAllocation(_message.Message):
|
252
|
+
__slots__ = ("function_executor_id", "task")
|
253
|
+
FUNCTION_EXECUTOR_ID_FIELD_NUMBER: _ClassVar[int]
|
254
|
+
TASK_FIELD_NUMBER: _ClassVar[int]
|
255
|
+
function_executor_id: str
|
256
|
+
task: Task
|
257
|
+
def __init__(
|
258
|
+
self,
|
259
|
+
function_executor_id: _Optional[str] = ...,
|
260
|
+
task: _Optional[_Union[Task, _Mapping]] = ...,
|
261
|
+
) -> None: ...
|
262
|
+
|
263
|
+
class GetDesiredExecutorStatesRequest(_message.Message):
|
264
|
+
__slots__ = ("executor_id",)
|
265
|
+
EXECUTOR_ID_FIELD_NUMBER: _ClassVar[int]
|
266
|
+
executor_id: str
|
267
|
+
def __init__(self, executor_id: _Optional[str] = ...) -> None: ...
|
268
|
+
|
269
|
+
class DesiredExecutorState(_message.Message):
|
270
|
+
__slots__ = ("function_executors", "task_allocations", "clock")
|
271
|
+
FUNCTION_EXECUTORS_FIELD_NUMBER: _ClassVar[int]
|
272
|
+
TASK_ALLOCATIONS_FIELD_NUMBER: _ClassVar[int]
|
273
|
+
CLOCK_FIELD_NUMBER: _ClassVar[int]
|
274
|
+
function_executors: _containers.RepeatedCompositeFieldContainer[
|
275
|
+
FunctionExecutorDescription
|
276
|
+
]
|
277
|
+
task_allocations: _containers.RepeatedCompositeFieldContainer[TaskAllocation]
|
278
|
+
clock: int
|
279
|
+
def __init__(
|
280
|
+
self,
|
281
|
+
function_executors: _Optional[
|
282
|
+
_Iterable[_Union[FunctionExecutorDescription, _Mapping]]
|
283
|
+
] = ...,
|
284
|
+
task_allocations: _Optional[_Iterable[_Union[TaskAllocation, _Mapping]]] = ...,
|
285
|
+
clock: _Optional[int] = ...,
|
286
|
+
) -> None: ...
|