indexify 0.3.14__py3-none-any.whl → 0.3.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. indexify/cli/cli.py +20 -91
  2. indexify/executor/api_objects.py +2 -0
  3. indexify/executor/executor.py +77 -86
  4. indexify/executor/function_executor/function_executor_state.py +43 -43
  5. indexify/executor/function_executor/function_executor_states_container.py +10 -4
  6. indexify/executor/function_executor/function_executor_status.py +91 -0
  7. indexify/executor/function_executor/metrics/function_executor.py +1 -1
  8. indexify/executor/function_executor/metrics/function_executor_state.py +36 -0
  9. indexify/executor/function_executor/server/function_executor_server_factory.py +8 -8
  10. indexify/executor/function_executor/single_task_runner.py +100 -37
  11. indexify/executor/grpc/channel_creator.py +53 -0
  12. indexify/executor/grpc/metrics/channel_creator.py +18 -0
  13. indexify/executor/grpc/metrics/state_reporter.py +17 -0
  14. indexify/executor/{state_reconciler.py → grpc/state_reconciler.py} +60 -31
  15. indexify/executor/grpc/state_reporter.py +199 -0
  16. indexify/executor/monitoring/health_checker/generic_health_checker.py +27 -12
  17. indexify/executor/task_runner.py +30 -6
  18. indexify/{task_scheduler/proto → proto}/task_scheduler.proto +23 -17
  19. indexify/proto/task_scheduler_pb2.py +64 -0
  20. indexify/{task_scheduler/proto → proto}/task_scheduler_pb2.pyi +28 -10
  21. indexify/{task_scheduler/proto → proto}/task_scheduler_pb2_grpc.py +16 -16
  22. {indexify-0.3.14.dist-info → indexify-0.3.16.dist-info}/METADATA +1 -1
  23. {indexify-0.3.14.dist-info → indexify-0.3.16.dist-info}/RECORD +25 -21
  24. indexify/executor/state_reporter.py +0 -127
  25. indexify/task_scheduler/proto/task_scheduler_pb2.py +0 -69
  26. {indexify-0.3.14.dist-info → indexify-0.3.16.dist-info}/WHEEL +0 -0
  27. {indexify-0.3.14.dist-info → indexify-0.3.16.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,199 @@
1
+ import asyncio
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ import grpc
5
+
6
+ from indexify.proto.task_scheduler_pb2 import (
7
+ AllowedFunction,
8
+ ExecutorState,
9
+ ExecutorStatus,
10
+ FunctionExecutorDescription,
11
+ )
12
+ from indexify.proto.task_scheduler_pb2 import (
13
+ FunctionExecutorState as FunctionExecutorStateProto,
14
+ )
15
+ from indexify.proto.task_scheduler_pb2 import (
16
+ FunctionExecutorStatus as FunctionExecutorStatusProto,
17
+ )
18
+ from indexify.proto.task_scheduler_pb2 import (
19
+ GPUModel,
20
+ GPUResources,
21
+ HostResources,
22
+ ReportExecutorStateRequest,
23
+ )
24
+ from indexify.proto.task_scheduler_pb2_grpc import (
25
+ TaskSchedulerServiceStub,
26
+ )
27
+
28
+ from ..api_objects import FunctionURI
29
+ from ..function_executor.function_executor_state import FunctionExecutorState
30
+ from ..function_executor.function_executor_states_container import (
31
+ FunctionExecutorStatesContainer,
32
+ )
33
+ from ..function_executor.function_executor_status import FunctionExecutorStatus
34
+ from .channel_creator import ChannelCreator
35
+ from .metrics.state_reporter import (
36
+ metric_state_report_errors,
37
+ metric_state_report_latency,
38
+ metric_state_report_rpcs,
39
+ )
40
+
41
+ _REPORTING_INTERVAL_SEC = 5
42
+ _REPORT_RPC_TIMEOUT_SEC = 5
43
+ _REPORT_BACKOFF_ON_ERROR_SEC = 5
44
+
45
+
46
+ class ExecutorStateReporter:
47
+ def __init__(
48
+ self,
49
+ executor_id: str,
50
+ development_mode: bool,
51
+ function_allowlist: Optional[List[FunctionURI]],
52
+ function_executor_states: FunctionExecutorStatesContainer,
53
+ channel_creator: ChannelCreator,
54
+ logger: Any,
55
+ ):
56
+ self._executor_id: str = executor_id
57
+ self._development_mode: bool = development_mode
58
+ self._function_executor_states: FunctionExecutorStatesContainer = (
59
+ function_executor_states
60
+ )
61
+ self._channel_creator = channel_creator
62
+ self._logger: Any = logger.bind(module=__name__)
63
+ self._is_shutdown: bool = False
64
+ self._executor_status: ExecutorStatus = ExecutorStatus.EXECUTOR_STATUS_UNKNOWN
65
+ self._allowed_functions: List[AllowedFunction] = _to_grpc_allowed_functions(
66
+ function_allowlist
67
+ )
68
+
69
+ def update_executor_status(self, value: ExecutorStatus):
70
+ self._executor_status = value
71
+
72
+ async def run(self):
73
+ """Runs the state reporter.
74
+
75
+ Never raises any exceptions.
76
+ """
77
+ while not self._is_shutdown:
78
+ async with await self._channel_creator.create() as server_channel:
79
+ server_channel: grpc.aio.Channel
80
+ stub = TaskSchedulerServiceStub(server_channel)
81
+ while not self._is_shutdown:
82
+ try:
83
+ await self._report_state(stub)
84
+ await asyncio.sleep(_REPORTING_INTERVAL_SEC)
85
+ except Exception as e:
86
+ self._logger.error(
87
+ f"Failed to report state to the server, reconnecting in {_REPORT_BACKOFF_ON_ERROR_SEC} sec.",
88
+ exc_info=e,
89
+ )
90
+ await asyncio.sleep(_REPORT_BACKOFF_ON_ERROR_SEC)
91
+ break
92
+
93
+ self._logger.info("State reporter shutdown")
94
+
95
+ async def _report_state(self, stub: TaskSchedulerServiceStub):
96
+ with (
97
+ metric_state_report_errors.count_exceptions(),
98
+ metric_state_report_latency.time(),
99
+ ):
100
+ metric_state_report_rpcs.inc()
101
+ state = ExecutorState(
102
+ executor_id=self._executor_id,
103
+ development_mode=self._development_mode,
104
+ executor_status=self._executor_status,
105
+ free_resources=await self._fetch_free_host_resources(),
106
+ allowed_functions=self._allowed_functions,
107
+ function_executor_states=await self._fetch_function_executor_states(),
108
+ )
109
+
110
+ await stub.report_executor_state(
111
+ ReportExecutorStateRequest(executor_state=state),
112
+ timeout=_REPORT_RPC_TIMEOUT_SEC,
113
+ )
114
+
115
+ async def _fetch_free_host_resources(self) -> HostResources:
116
+ # TODO: Implement host resource metrics reporting.
117
+ return HostResources(
118
+ cpu_count=0,
119
+ memory_bytes=0,
120
+ disk_bytes=0,
121
+ gpu=GPUResources(
122
+ count=0,
123
+ model=GPUModel.GPU_MODEL_UNKNOWN,
124
+ ),
125
+ )
126
+
127
+ async def _fetch_function_executor_states(self) -> List[FunctionExecutorStateProto]:
128
+ states = []
129
+
130
+ async for function_executor_state in self._function_executor_states:
131
+ function_executor_state: FunctionExecutorState
132
+ states.append(
133
+ FunctionExecutorStateProto(
134
+ description=FunctionExecutorDescription(
135
+ id=function_executor_state.id,
136
+ namespace=function_executor_state.namespace,
137
+ graph_name=function_executor_state.graph_name,
138
+ graph_version=function_executor_state.graph_version,
139
+ function_name=function_executor_state.function_name,
140
+ ),
141
+ status=_to_grpc_function_executor_status(
142
+ function_executor_state.status, self._logger
143
+ ),
144
+ )
145
+ )
146
+
147
+ return states
148
+
149
+ async def shutdown(self):
150
+ """Shuts down the state reporter.
151
+
152
+ Never raises any exceptions.
153
+ """
154
+ self._is_shutdown = True
155
+
156
+
157
+ def _to_grpc_allowed_functions(function_allowlist: Optional[List[FunctionURI]]):
158
+ if function_allowlist is None:
159
+ return []
160
+
161
+ allowed_functions: List[AllowedFunction] = []
162
+ for function_uri in function_allowlist:
163
+ function_uri: FunctionURI
164
+ allowed_function = AllowedFunction(
165
+ namespace=function_uri.namespace,
166
+ graph_name=function_uri.compute_graph,
167
+ function_name=function_uri.compute_fn,
168
+ )
169
+ if function_uri.version is not None:
170
+ allowed_function.graph_version = function_uri.version
171
+ allowed_functions.append(allowed_function)
172
+
173
+ return allowed_functions
174
+
175
+
176
+ _STATUS_MAPPING: Dict[FunctionExecutorStatus, Any] = {
177
+ FunctionExecutorStatus.STARTING_UP: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTING_UP,
178
+ FunctionExecutorStatus.STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR,
179
+ FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR,
180
+ FunctionExecutorStatus.IDLE: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_IDLE,
181
+ FunctionExecutorStatus.RUNNING_TASK: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_RUNNING_TASK,
182
+ FunctionExecutorStatus.UNHEALTHY: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_UNHEALTHY,
183
+ FunctionExecutorStatus.DESTROYING: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STOPPING,
184
+ FunctionExecutorStatus.DESTROYED: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STOPPED,
185
+ FunctionExecutorStatus.SHUTDOWN: FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STOPPED,
186
+ }
187
+
188
+
189
+ def _to_grpc_function_executor_status(
190
+ status: FunctionExecutorStatus, logger: Any
191
+ ) -> FunctionExecutorStatusProto:
192
+ result: FunctionExecutorStatusProto = _STATUS_MAPPING.get(
193
+ status, FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_UNKNOWN
194
+ )
195
+
196
+ if result == FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_UNKNOWN:
197
+ logger.error("Unexpected Function Executor status", status=status)
198
+
199
+ return result
@@ -3,6 +3,7 @@ from typing import Optional
3
3
  from ...function_executor.function_executor_states_container import (
4
4
  FunctionExecutorStatesContainer,
5
5
  )
6
+ from ...function_executor.function_executor_status import FunctionExecutorStatus
6
7
  from .health_checker import HealthChecker, HealthCheckResult
7
8
 
8
9
  HEALTH_CHECKER_NAME = "GenericHealthChecker"
@@ -16,6 +17,7 @@ class GenericHealthChecker(HealthChecker):
16
17
 
17
18
  def __init__(self):
18
19
  self._function_executor_states: Optional[FunctionExecutorStatesContainer] = None
20
+ self._function_executor_health_check_ever_failed = False
19
21
 
20
22
  def set_function_executor_states_container(
21
23
  self, states: FunctionExecutorStatesContainer
@@ -42,17 +44,30 @@ class GenericHealthChecker(HealthChecker):
42
44
  # * So we fail whole Executor health check if a Function Executor health check ever failed to hint the users
43
45
  # that we probably need to recreate the Executor machine/VM/container (unless there's a bug in Function
44
46
  # code that user can investigate themself).
47
+ await self._check_function_executors()
48
+ if self._function_executor_health_check_ever_failed:
49
+ return HealthCheckResult(
50
+ is_success=False,
51
+ status_message="A Function Executor health check failed",
52
+ checker_name=HEALTH_CHECKER_NAME,
53
+ )
54
+ else:
55
+ return HealthCheckResult(
56
+ is_success=True,
57
+ status_message="All Function Executors pass health checks",
58
+ checker_name=HEALTH_CHECKER_NAME,
59
+ )
60
+
61
+ async def _check_function_executors(self):
62
+ if self._function_executor_health_check_ever_failed:
63
+ return
64
+
45
65
  async for state in self._function_executor_states:
46
66
  # No need to async lock the state to read a single value.
47
- if state.health_check_failed:
48
- return HealthCheckResult(
49
- is_success=False,
50
- status_message="A Function Executor health check failed",
51
- checker_name=HEALTH_CHECKER_NAME,
52
- )
53
-
54
- return HealthCheckResult(
55
- is_success=True,
56
- status_message="All Function Executors pass health checks",
57
- checker_name=HEALTH_CHECKER_NAME,
58
- )
67
+ if state.status in [
68
+ FunctionExecutorStatus.UNHEALTHY,
69
+ FunctionExecutorStatus.STARTUP_FAILED_CUSTOMER_ERROR,
70
+ FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR,
71
+ ]:
72
+ self._function_executor_health_check_ever_failed = True
73
+ return
@@ -1,7 +1,10 @@
1
1
  from typing import Any, Optional
2
2
 
3
3
  from .api_objects import Task
4
- from .function_executor.function_executor_state import FunctionExecutorState
4
+ from .function_executor.function_executor_state import (
5
+ FunctionExecutorState,
6
+ FunctionExecutorStatus,
7
+ )
5
8
  from .function_executor.function_executor_states_container import (
6
9
  FunctionExecutorStatesContainer,
7
10
  )
@@ -109,6 +112,10 @@ class TaskRunner:
109
112
  raise
110
113
 
111
114
  async def _run_task_policy(self, state: FunctionExecutorState, task: Task) -> None:
115
+ """Runs the task policy until the task can run on the Function Executor.
116
+
117
+ On successful return the Function Executor status is either IDLE or DESTROYED.
118
+ """
112
119
  # Current policy for running tasks:
113
120
  # - There can only be a single Function Executor per function regardless of function versions.
114
121
  # -- If a Function Executor already exists for a different function version then wait until
@@ -116,13 +123,30 @@ class TaskRunner:
116
123
  # -- This prevents failed tasks for different versions of the same function continiously
117
124
  # destroying each other's Function Executors.
118
125
  # - Each Function Executor rans at most 1 task concurrently.
119
- await state.wait_running_tasks_less(1)
126
+ await state.wait_status(
127
+ [
128
+ FunctionExecutorStatus.DESTROYED,
129
+ FunctionExecutorStatus.IDLE,
130
+ FunctionExecutorStatus.UNHEALTHY,
131
+ FunctionExecutorStatus.SHUTDOWN,
132
+ ]
133
+ )
134
+ # We only shutdown the Function Executor on full Executor shutdown so it's fine to raise error here.
135
+ if state.status == FunctionExecutorStatus.SHUTDOWN:
136
+ raise Exception("Function Executor state is shutting down")
120
137
 
121
- if state.graph_version != task.graph_version:
138
+ if state.status == FunctionExecutorStatus.UNHEALTHY:
122
139
  await state.destroy_function_executor()
123
- state.graph_version = task.graph_version
124
- # At this point the state belongs to the version of the function from the task
125
- # and there are no running tasks in the Function Executor.
140
+
141
+ if state.graph_version == task.graph_version:
142
+ return # All good, we can run on this Function Executor.
143
+
144
+ if state.status in [FunctionExecutorStatus.IDLE]:
145
+ await state.destroy_function_executor()
146
+
147
+ state.graph_version = task.graph_version
148
+ # At this point the state belongs to the version of the function from the task
149
+ # and there are no running tasks in the Function Executor.
126
150
 
127
151
  async def _run_task(
128
152
  self, state: FunctionExecutorState, task_input: TaskInput, logger: Any
@@ -26,7 +26,7 @@ message GPUResources {
26
26
  optional GPUModel model = 2;
27
27
  }
28
28
 
29
- // Free host resources available at the Executor.
29
+ // Resources that we're currently tracking and limiting on Executor.
30
30
  message HostResources {
31
31
  optional uint32 cpu_count = 1;
32
32
  optional uint64 memory_bytes = 2;
@@ -45,14 +45,14 @@ message AllowedFunction {
45
45
 
46
46
  enum FunctionExecutorStatus {
47
47
  FUNCTION_EXECUTOR_STATUS_UNKNOWN = 0;
48
- FUNCTION_EXECUTOR_STATUS_STOPPED = 1;
49
- FUNCTION_EXECUTOR_STATUS_STARTING_UP = 2;
50
- FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR = 3;
51
- FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR = 4;
52
- FUNCTION_EXECUTOR_STATUS_IDLE = 5;
53
- FUNCTION_EXECUTOR_STATUS_RUNNING_TASK = 6;
54
- FUNCTION_EXECUTOR_STATUS_UNHEALTHY = 7;
55
- FUNCTION_EXECUTOR_STATUS_STOPPING = 8;
48
+ FUNCTION_EXECUTOR_STATUS_STARTING_UP = 1;
49
+ FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR = 2;
50
+ FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR = 3;
51
+ FUNCTION_EXECUTOR_STATUS_IDLE = 4;
52
+ FUNCTION_EXECUTOR_STATUS_RUNNING_TASK = 5;
53
+ FUNCTION_EXECUTOR_STATUS_UNHEALTHY = 6;
54
+ FUNCTION_EXECUTOR_STATUS_STOPPING = 7;
55
+ FUNCTION_EXECUTOR_STATUS_STOPPED = 8;
56
56
  }
57
57
 
58
58
  // Immutable information that identifies and describes a Function Executor.
@@ -63,6 +63,8 @@ message FunctionExecutorDescription {
63
63
  optional string graph_version = 4;
64
64
  optional string function_name = 5;
65
65
  optional string image_uri = 6;
66
+ repeated string secret_names = 7;
67
+ optional HostResources resource_limits = 8;
66
68
  }
67
69
 
68
70
  message FunctionExecutorState {
@@ -72,19 +74,22 @@ message FunctionExecutorState {
72
74
 
73
75
  enum ExecutorStatus {
74
76
  EXECUTOR_STATUS_UNKNOWN = 0;
75
- EXECUTOR_STATUS_STARTING = 1;
77
+ EXECUTOR_STATUS_STARTING_UP = 1;
76
78
  EXECUTOR_STATUS_RUNNING = 2;
77
79
  EXECUTOR_STATUS_DRAINED = 3;
78
- EXECUTOR_STATUS_SHUTTING_DOWN = 4;
80
+ EXECUTOR_STATUS_STOPPING = 4;
81
+ EXECUTOR_STATUS_STOPPED = 5;
79
82
  }
80
83
 
81
84
  message ExecutorState {
82
85
  optional string executor_id = 1;
83
- optional ExecutorStatus executor_status = 2;
84
- optional HostResources host_resources = 3;
86
+ optional bool development_mode = 2;
87
+ optional ExecutorStatus executor_status = 3;
88
+ // Free resources available at the Executor.
89
+ optional HostResources free_resources = 4;
85
90
  // Empty allowed_functions list means that any function can run on the Executor.
86
- repeated AllowedFunction allowed_functions = 4;
87
- repeated FunctionExecutorState function_executor_states = 5;
91
+ repeated AllowedFunction allowed_functions = 5;
92
+ repeated FunctionExecutorState function_executor_states = 6;
88
93
  }
89
94
 
90
95
  // A message sent by Executor to report its up to date state to Server.
@@ -106,6 +111,7 @@ message Task {
106
111
  optional string graph_invocation_id = 6;
107
112
  optional string input_key = 8;
108
113
  optional string reducer_output_key = 9;
114
+ optional string timeout_ms = 10;
109
115
  }
110
116
 
111
117
  message TaskAllocation {
@@ -139,9 +145,9 @@ service TaskSchedulerService {
139
145
  // Called by Executor to open a stream of its desired states. When Server wants Executor to change something
140
146
  // it puts a message on the stream with the new desired state of the Executor.
141
147
  //
142
- // Depricated HTTP API is used to download the serialized graph and task inputs.
148
+ // Deprecated HTTP API is used to download the serialized graph and task inputs.
143
149
  rpc get_desired_executor_states(GetDesiredExecutorStatesRequest) returns (stream DesiredExecutorState) {}
144
150
 
145
- // Task outcome is currently reported via depricated HTTP API. We're going to migrate task output reporting to gRPC
151
+ // Task outcome is currently reported via deprecated HTTP API. We're going to migrate task output reporting to gRPC
146
152
  // when we move S3 downloads and uploads to Executor.
147
153
  }
@@ -0,0 +1,64 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: indexify/proto/task_scheduler.proto
5
+ # Protobuf Python Version: 5.29.0
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+
13
+ _runtime_version.ValidateProtobufRuntimeVersion(
14
+ _runtime_version.Domain.PUBLIC, 5, 29, 0, "", "indexify/proto/task_scheduler.proto"
15
+ )
16
+ # @@protoc_insertion_point(imports)
17
+
18
+ _sym_db = _symbol_database.Default()
19
+
20
+
21
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
22
+ b'\n#indexify/proto/task_scheduler.proto\x12\x16task_scheduler_service"l\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x34\n\x05model\x18\x02 \x01(\x0e\x32 .task_scheduler_service.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc9\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12\x36\n\x03gpu\x18\x04 \x01(\x0b\x32$.task_scheduler_service.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xf4\x02\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12\x43\n\x0fresource_limits\x18\x08 \x01(\x0b\x32%.task_scheduler_service.HostResourcesH\x06\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x12\n\x10_resource_limits"\xc6\x01\n\x15\x46unctionExecutorState\x12M\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32\x33.task_scheduler_service.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x43\n\x06status\x18\x02 \x01(\x0e\x32..task_scheduler_service.FunctionExecutorStatusH\x01\x88\x01\x01\x42\x0e\n\x0c_descriptionB\t\n\x07_status"\xb3\x03\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1d\n\x10\x64\x65velopment_mode\x18\x02 \x01(\x08H\x01\x88\x01\x01\x12\x44\n\x0f\x65xecutor_status\x18\x03 \x01(\x0e\x32&.task_scheduler_service.ExecutorStatusH\x02\x88\x01\x01\x12\x42\n\x0e\x66ree_resources\x18\x04 \x01(\x0b\x32%.task_scheduler_service.HostResourcesH\x03\x88\x01\x01\x12\x42\n\x11\x61llowed_functions\x18\x05 \x03(\x0b\x32\'.task_scheduler_service.AllowedFunction\x12O\n\x18\x66unction_executor_states\x18\x06 \x03(\x0b\x32-.task_scheduler_service.FunctionExecutorStateB\x0e\n\x0c_executor_idB\x13\n\x11_development_modeB\x12\n\x10_executor_statusB\x11\n\x0f_free_resources"s\n\x1aReportExecutorStateRequest\x12\x42\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32%.task_scheduler_service.ExecutorStateH\x00\x88\x01\x01\x42\x11\n\x0f_executor_state"\x1d\n\x1bReportExecutorStateResponse"\x88\x03\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x16\n\tinput_key\x18\x08 \x01(\tH\x06\x88\x01\x01\x12\x1f\n\x12reducer_output_key\x18\t \x01(\tH\x07\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\tH\x08\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0c\n\n_input_keyB\x15\n\x13_reducer_output_keyB\r\n\x0b_timeout_ms"\x86\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12/\n\x04task\x18\x02 \x01(\x0b\x32\x1c.task_scheduler_service.TaskH\x01\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_task"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xc7\x01\n\x14\x44\x65siredExecutorState\x12O\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32\x33.task_scheduler_service.FunctionExecutorDescription\x12@\n\x10task_allocations\x18\x02 \x03(\x0b\x32&.task_scheduler_service.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock*\x86\x03\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12"\n\x1eGPU_MODEL_NVIDIA_TESLA_T4_16GB\x10\n\x12$\n GPU_MODEL_NVIDIA_TESLA_V100_16GB\x10\x14\x12\x1d\n\x19GPU_MODEL_NVIDIA_A10_24GB\x10\x1e\x12\x1f\n\x1bGPU_MODEL_NVIDIA_A6000_48GB\x10(\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_40GB\x10\x32\x12#\n\x1fGPU_MODEL_NVIDIA_A100_SXM4_80GB\x10\x33\x12"\n\x1eGPU_MODEL_NVIDIA_A100_PCI_40GB\x10\x34\x12#\n\x1fGPU_MODEL_NVIDIA_H100_SXM5_80GB\x10<\x12"\n\x1eGPU_MODEL_NVIDIA_H100_PCI_80GB\x10=\x12"\n\x1eGPU_MODEL_NVIDIA_RTX_6000_24GB\x10>*\xa3\x03\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12(\n$FUNCTION_EXECUTOR_STATUS_STARTING_UP\x10\x01\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR\x10\x02\x12:\n6FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR\x10\x03\x12!\n\x1d\x46UNCTION_EXECUTOR_STATUS_IDLE\x10\x04\x12)\n%FUNCTION_EXECUTOR_STATUS_RUNNING_TASK\x10\x05\x12&\n"FUNCTION_EXECUTOR_STATUS_UNHEALTHY\x10\x06\x12%\n!FUNCTION_EXECUTOR_STATUS_STOPPING\x10\x07\x12$\n FUNCTION_EXECUTOR_STATUS_STOPPED\x10\x08*\xc3\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1c\n\x18\x45XECUTOR_STATUS_STOPPING\x10\x04\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x05\x32\xa6\x02\n\x14TaskSchedulerService\x12\x82\x01\n\x15report_executor_state\x12\x32.task_scheduler_service.ReportExecutorStateRequest\x1a\x33.task_scheduler_service.ReportExecutorStateResponse"\x00\x12\x88\x01\n\x1bget_desired_executor_states\x12\x37.task_scheduler_service.GetDesiredExecutorStatesRequest\x1a,.task_scheduler_service.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
23
+ )
24
+
25
+ _globals = globals()
26
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
27
+ _builder.BuildTopDescriptorsAndMessages(
28
+ DESCRIPTOR, "indexify.proto.task_scheduler_pb2", _globals
29
+ )
30
+ if not _descriptor._USE_C_DESCRIPTORS:
31
+ DESCRIPTOR._loaded_options = None
32
+ _globals["_GPUMODEL"]._serialized_start = 2541
33
+ _globals["_GPUMODEL"]._serialized_end = 2931
34
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 2934
35
+ _globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 3353
36
+ _globals["_EXECUTORSTATUS"]._serialized_start = 3356
37
+ _globals["_EXECUTORSTATUS"]._serialized_end = 3551
38
+ _globals["_GPURESOURCES"]._serialized_start = 63
39
+ _globals["_GPURESOURCES"]._serialized_end = 171
40
+ _globals["_HOSTRESOURCES"]._serialized_start = 174
41
+ _globals["_HOSTRESOURCES"]._serialized_end = 375
42
+ _globals["_ALLOWEDFUNCTION"]._serialized_start = 378
43
+ _globals["_ALLOWEDFUNCTION"]._serialized_end = 565
44
+ _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_start = 568
45
+ _globals["_FUNCTIONEXECUTORDESCRIPTION"]._serialized_end = 940
46
+ _globals["_FUNCTIONEXECUTORSTATE"]._serialized_start = 943
47
+ _globals["_FUNCTIONEXECUTORSTATE"]._serialized_end = 1141
48
+ _globals["_EXECUTORSTATE"]._serialized_start = 1144
49
+ _globals["_EXECUTORSTATE"]._serialized_end = 1579
50
+ _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_start = 1581
51
+ _globals["_REPORTEXECUTORSTATEREQUEST"]._serialized_end = 1696
52
+ _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_start = 1698
53
+ _globals["_REPORTEXECUTORSTATERESPONSE"]._serialized_end = 1727
54
+ _globals["_TASK"]._serialized_start = 1730
55
+ _globals["_TASK"]._serialized_end = 2122
56
+ _globals["_TASKALLOCATION"]._serialized_start = 2125
57
+ _globals["_TASKALLOCATION"]._serialized_end = 2259
58
+ _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_start = 2261
59
+ _globals["_GETDESIREDEXECUTORSTATESREQUEST"]._serialized_end = 2336
60
+ _globals["_DESIREDEXECUTORSTATE"]._serialized_start = 2339
61
+ _globals["_DESIREDEXECUTORSTATE"]._serialized_end = 2538
62
+ _globals["_TASKSCHEDULERSERVICE"]._serialized_start = 3554
63
+ _globals["_TASKSCHEDULERSERVICE"]._serialized_end = 3848
64
+ # @@protoc_insertion_point(module_scope)
@@ -28,7 +28,6 @@ class GPUModel(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
28
28
  class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
29
29
  __slots__ = ()
30
30
  FUNCTION_EXECUTOR_STATUS_UNKNOWN: _ClassVar[FunctionExecutorStatus]
31
- FUNCTION_EXECUTOR_STATUS_STOPPED: _ClassVar[FunctionExecutorStatus]
32
31
  FUNCTION_EXECUTOR_STATUS_STARTING_UP: _ClassVar[FunctionExecutorStatus]
33
32
  FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: _ClassVar[
34
33
  FunctionExecutorStatus
@@ -40,14 +39,16 @@ class FunctionExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
40
39
  FUNCTION_EXECUTOR_STATUS_RUNNING_TASK: _ClassVar[FunctionExecutorStatus]
41
40
  FUNCTION_EXECUTOR_STATUS_UNHEALTHY: _ClassVar[FunctionExecutorStatus]
42
41
  FUNCTION_EXECUTOR_STATUS_STOPPING: _ClassVar[FunctionExecutorStatus]
42
+ FUNCTION_EXECUTOR_STATUS_STOPPED: _ClassVar[FunctionExecutorStatus]
43
43
 
44
44
  class ExecutorStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
45
45
  __slots__ = ()
46
46
  EXECUTOR_STATUS_UNKNOWN: _ClassVar[ExecutorStatus]
47
- EXECUTOR_STATUS_STARTING: _ClassVar[ExecutorStatus]
47
+ EXECUTOR_STATUS_STARTING_UP: _ClassVar[ExecutorStatus]
48
48
  EXECUTOR_STATUS_RUNNING: _ClassVar[ExecutorStatus]
49
49
  EXECUTOR_STATUS_DRAINED: _ClassVar[ExecutorStatus]
50
- EXECUTOR_STATUS_SHUTTING_DOWN: _ClassVar[ExecutorStatus]
50
+ EXECUTOR_STATUS_STOPPING: _ClassVar[ExecutorStatus]
51
+ EXECUTOR_STATUS_STOPPED: _ClassVar[ExecutorStatus]
51
52
 
52
53
  GPU_MODEL_UNKNOWN: GPUModel
53
54
  GPU_MODEL_NVIDIA_TESLA_T4_16GB: GPUModel
@@ -61,7 +62,6 @@ GPU_MODEL_NVIDIA_H100_SXM5_80GB: GPUModel
61
62
  GPU_MODEL_NVIDIA_H100_PCI_80GB: GPUModel
62
63
  GPU_MODEL_NVIDIA_RTX_6000_24GB: GPUModel
63
64
  FUNCTION_EXECUTOR_STATUS_UNKNOWN: FunctionExecutorStatus
64
- FUNCTION_EXECUTOR_STATUS_STOPPED: FunctionExecutorStatus
65
65
  FUNCTION_EXECUTOR_STATUS_STARTING_UP: FunctionExecutorStatus
66
66
  FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR: FunctionExecutorStatus
67
67
  FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR: FunctionExecutorStatus
@@ -69,11 +69,13 @@ FUNCTION_EXECUTOR_STATUS_IDLE: FunctionExecutorStatus
69
69
  FUNCTION_EXECUTOR_STATUS_RUNNING_TASK: FunctionExecutorStatus
70
70
  FUNCTION_EXECUTOR_STATUS_UNHEALTHY: FunctionExecutorStatus
71
71
  FUNCTION_EXECUTOR_STATUS_STOPPING: FunctionExecutorStatus
72
+ FUNCTION_EXECUTOR_STATUS_STOPPED: FunctionExecutorStatus
72
73
  EXECUTOR_STATUS_UNKNOWN: ExecutorStatus
73
- EXECUTOR_STATUS_STARTING: ExecutorStatus
74
+ EXECUTOR_STATUS_STARTING_UP: ExecutorStatus
74
75
  EXECUTOR_STATUS_RUNNING: ExecutorStatus
75
76
  EXECUTOR_STATUS_DRAINED: ExecutorStatus
76
- EXECUTOR_STATUS_SHUTTING_DOWN: ExecutorStatus
77
+ EXECUTOR_STATUS_STOPPING: ExecutorStatus
78
+ EXECUTOR_STATUS_STOPPED: ExecutorStatus
77
79
 
78
80
  class GPUResources(_message.Message):
79
81
  __slots__ = ("count", "model")
@@ -129,6 +131,8 @@ class FunctionExecutorDescription(_message.Message):
129
131
  "graph_version",
130
132
  "function_name",
131
133
  "image_uri",
134
+ "secret_names",
135
+ "resource_limits",
132
136
  )
133
137
  ID_FIELD_NUMBER: _ClassVar[int]
134
138
  NAMESPACE_FIELD_NUMBER: _ClassVar[int]
@@ -136,12 +140,16 @@ class FunctionExecutorDescription(_message.Message):
136
140
  GRAPH_VERSION_FIELD_NUMBER: _ClassVar[int]
137
141
  FUNCTION_NAME_FIELD_NUMBER: _ClassVar[int]
138
142
  IMAGE_URI_FIELD_NUMBER: _ClassVar[int]
143
+ SECRET_NAMES_FIELD_NUMBER: _ClassVar[int]
144
+ RESOURCE_LIMITS_FIELD_NUMBER: _ClassVar[int]
139
145
  id: str
140
146
  namespace: str
141
147
  graph_name: str
142
148
  graph_version: str
143
149
  function_name: str
144
150
  image_uri: str
151
+ secret_names: _containers.RepeatedScalarFieldContainer[str]
152
+ resource_limits: HostResources
145
153
  def __init__(
146
154
  self,
147
155
  id: _Optional[str] = ...,
@@ -150,6 +158,8 @@ class FunctionExecutorDescription(_message.Message):
150
158
  graph_version: _Optional[str] = ...,
151
159
  function_name: _Optional[str] = ...,
152
160
  image_uri: _Optional[str] = ...,
161
+ secret_names: _Optional[_Iterable[str]] = ...,
162
+ resource_limits: _Optional[_Union[HostResources, _Mapping]] = ...,
153
163
  ) -> None: ...
154
164
 
155
165
  class FunctionExecutorState(_message.Message):
@@ -167,19 +177,22 @@ class FunctionExecutorState(_message.Message):
167
177
  class ExecutorState(_message.Message):
168
178
  __slots__ = (
169
179
  "executor_id",
180
+ "development_mode",
170
181
  "executor_status",
171
- "host_resources",
182
+ "free_resources",
172
183
  "allowed_functions",
173
184
  "function_executor_states",
174
185
  )
175
186
  EXECUTOR_ID_FIELD_NUMBER: _ClassVar[int]
187
+ DEVELOPMENT_MODE_FIELD_NUMBER: _ClassVar[int]
176
188
  EXECUTOR_STATUS_FIELD_NUMBER: _ClassVar[int]
177
- HOST_RESOURCES_FIELD_NUMBER: _ClassVar[int]
189
+ FREE_RESOURCES_FIELD_NUMBER: _ClassVar[int]
178
190
  ALLOWED_FUNCTIONS_FIELD_NUMBER: _ClassVar[int]
179
191
  FUNCTION_EXECUTOR_STATES_FIELD_NUMBER: _ClassVar[int]
180
192
  executor_id: str
193
+ development_mode: bool
181
194
  executor_status: ExecutorStatus
182
- host_resources: HostResources
195
+ free_resources: HostResources
183
196
  allowed_functions: _containers.RepeatedCompositeFieldContainer[AllowedFunction]
184
197
  function_executor_states: _containers.RepeatedCompositeFieldContainer[
185
198
  FunctionExecutorState
@@ -187,8 +200,9 @@ class ExecutorState(_message.Message):
187
200
  def __init__(
188
201
  self,
189
202
  executor_id: _Optional[str] = ...,
203
+ development_mode: bool = ...,
190
204
  executor_status: _Optional[_Union[ExecutorStatus, str]] = ...,
191
- host_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
205
+ free_resources: _Optional[_Union[HostResources, _Mapping]] = ...,
192
206
  allowed_functions: _Optional[
193
207
  _Iterable[_Union[AllowedFunction, _Mapping]]
194
208
  ] = ...,
@@ -219,6 +233,7 @@ class Task(_message.Message):
219
233
  "graph_invocation_id",
220
234
  "input_key",
221
235
  "reducer_output_key",
236
+ "timeout_ms",
222
237
  )
223
238
  ID_FIELD_NUMBER: _ClassVar[int]
224
239
  NAMESPACE_FIELD_NUMBER: _ClassVar[int]
@@ -228,6 +243,7 @@ class Task(_message.Message):
228
243
  GRAPH_INVOCATION_ID_FIELD_NUMBER: _ClassVar[int]
229
244
  INPUT_KEY_FIELD_NUMBER: _ClassVar[int]
230
245
  REDUCER_OUTPUT_KEY_FIELD_NUMBER: _ClassVar[int]
246
+ TIMEOUT_MS_FIELD_NUMBER: _ClassVar[int]
231
247
  id: str
232
248
  namespace: str
233
249
  graph_name: str
@@ -236,6 +252,7 @@ class Task(_message.Message):
236
252
  graph_invocation_id: str
237
253
  input_key: str
238
254
  reducer_output_key: str
255
+ timeout_ms: str
239
256
  def __init__(
240
257
  self,
241
258
  id: _Optional[str] = ...,
@@ -246,6 +263,7 @@ class Task(_message.Message):
246
263
  graph_invocation_id: _Optional[str] = ...,
247
264
  input_key: _Optional[str] = ...,
248
265
  reducer_output_key: _Optional[str] = ...,
266
+ timeout_ms: _Optional[str] = ...,
249
267
  ) -> None: ...
250
268
 
251
269
  class TaskAllocation(_message.Message):