indexify 0.3.30__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/__init__.py +18 -0
- indexify/cli/build_image.py +51 -0
- indexify/cli/deploy.py +57 -0
- indexify/cli/executor.py +205 -0
- indexify/executor/{grpc/channel_manager.py → channel_manager.py} +17 -11
- indexify/executor/executor.py +57 -311
- indexify/executor/function_allowlist.py +59 -0
- indexify/executor/function_executor/function_executor.py +12 -6
- indexify/executor/function_executor/invocation_state_client.py +25 -3
- indexify/executor/function_executor/server/function_executor_server_factory.py +3 -3
- indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -11
- indexify/executor/function_executor_controller/__init__.py +13 -0
- indexify/executor/function_executor_controller/completed_task_metrics.py +82 -0
- indexify/executor/function_executor_controller/create_function_executor.py +154 -0
- indexify/executor/function_executor_controller/debug_event_loop.py +37 -0
- indexify/executor/function_executor_controller/destroy_function_executor.py +28 -0
- indexify/executor/function_executor_controller/downloads.py +199 -0
- indexify/executor/function_executor_controller/events.py +172 -0
- indexify/executor/function_executor_controller/function_executor_controller.py +759 -0
- indexify/executor/function_executor_controller/loggers.py +57 -0
- indexify/executor/function_executor_controller/message_validators.py +65 -0
- indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +68 -0
- indexify/executor/{metrics/downloader.py → function_executor_controller/metrics/downloads.py} +1 -3
- indexify/executor/function_executor_controller/metrics/function_executor_controller.py +60 -0
- indexify/executor/{function_executor/metrics/single_task_runner.py → function_executor_controller/metrics/run_task.py} +9 -3
- indexify/executor/function_executor_controller/metrics/upload_task_output.py +39 -0
- indexify/executor/function_executor_controller/prepare_task.py +38 -0
- indexify/executor/function_executor_controller/run_task.py +201 -0
- indexify/executor/function_executor_controller/task_info.py +33 -0
- indexify/executor/function_executor_controller/task_output.py +122 -0
- indexify/executor/function_executor_controller/upload_task_output.py +234 -0
- indexify/executor/host_resources/host_resources.py +20 -25
- indexify/executor/{grpc/metrics → metrics}/channel_manager.py +1 -1
- indexify/executor/metrics/executor.py +0 -47
- indexify/executor/{grpc/metrics → metrics}/state_reconciler.py +1 -1
- indexify/executor/{grpc/metrics → metrics}/state_reporter.py +1 -1
- indexify/executor/monitoring/health_checker/generic_health_checker.py +6 -59
- indexify/executor/monitoring/health_checker/health_checker.py +0 -11
- indexify/executor/{grpc/state_reconciler.py → state_reconciler.py} +139 -141
- indexify/executor/state_reporter.py +364 -0
- indexify/proto/executor_api.proto +67 -59
- indexify/proto/executor_api_pb2.py +52 -52
- indexify/proto/executor_api_pb2.pyi +125 -104
- indexify/proto/executor_api_pb2_grpc.py +0 -47
- {indexify-0.3.30.dist-info → indexify-0.4.2.dist-info}/METADATA +1 -3
- indexify-0.4.2.dist-info/RECORD +68 -0
- indexify-0.4.2.dist-info/entry_points.txt +3 -0
- indexify/cli/cli.py +0 -267
- indexify/executor/api_objects.py +0 -92
- indexify/executor/downloader.py +0 -417
- indexify/executor/executor_flavor.py +0 -7
- indexify/executor/function_executor/function_executor_state.py +0 -107
- indexify/executor/function_executor/function_executor_states_container.py +0 -93
- indexify/executor/function_executor/function_executor_status.py +0 -95
- indexify/executor/function_executor/metrics/function_executor_state.py +0 -46
- indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -10
- indexify/executor/function_executor/single_task_runner.py +0 -345
- indexify/executor/function_executor/task_input.py +0 -21
- indexify/executor/function_executor/task_output.py +0 -105
- indexify/executor/grpc/function_executor_controller.py +0 -418
- indexify/executor/grpc/metrics/task_controller.py +0 -8
- indexify/executor/grpc/state_reporter.py +0 -314
- indexify/executor/grpc/task_controller.py +0 -508
- indexify/executor/metrics/task_fetcher.py +0 -21
- indexify/executor/metrics/task_reporter.py +0 -53
- indexify/executor/metrics/task_runner.py +0 -52
- indexify/executor/monitoring/function_allowlist.py +0 -25
- indexify/executor/runtime_probes.py +0 -68
- indexify/executor/task_fetcher.py +0 -96
- indexify/executor/task_reporter.py +0 -459
- indexify/executor/task_runner.py +0 -177
- indexify-0.3.30.dist-info/RECORD +0 -68
- indexify-0.3.30.dist-info/entry_points.txt +0 -3
- {indexify-0.3.30.dist-info → indexify-0.4.2.dist-info}/WHEEL +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
from pathlib import Path
|
2
3
|
from typing import Any, AsyncGenerator, Dict, Iterable, List, Optional, Set
|
3
4
|
|
4
5
|
from tensorlake.function_executor.proto.message_validator import MessageValidator
|
@@ -6,26 +7,23 @@ from tensorlake.function_executor.proto.message_validator import MessageValidato
|
|
6
7
|
from indexify.proto.executor_api_pb2 import (
|
7
8
|
DesiredExecutorState,
|
8
9
|
FunctionExecutorDescription,
|
10
|
+
FunctionExecutorTerminationReason,
|
9
11
|
GetDesiredExecutorStatesRequest,
|
10
12
|
TaskAllocation,
|
11
13
|
)
|
12
14
|
from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
|
13
15
|
|
14
|
-
from
|
15
|
-
from
|
16
|
-
from
|
17
|
-
FunctionExecutorStatesContainer,
|
18
|
-
)
|
19
|
-
from ..function_executor.function_executor_status import FunctionExecutorStatus
|
20
|
-
from ..function_executor.server.function_executor_server_factory import (
|
16
|
+
from .blob_store.blob_store import BLOBStore
|
17
|
+
from .channel_manager import ChannelManager
|
18
|
+
from .function_executor.server.function_executor_server_factory import (
|
21
19
|
FunctionExecutorServerFactory,
|
22
20
|
)
|
23
|
-
from ..task_reporter import TaskReporter
|
24
|
-
from .channel_manager import ChannelManager
|
25
21
|
from .function_executor_controller import (
|
26
22
|
FunctionExecutorController,
|
27
23
|
function_executor_logger,
|
24
|
+
task_logger,
|
28
25
|
validate_function_executor_description,
|
26
|
+
validate_task,
|
29
27
|
)
|
30
28
|
from .metrics.state_reconciler import (
|
31
29
|
metric_state_reconciliation_errors,
|
@@ -33,7 +31,6 @@ from .metrics.state_reconciler import (
|
|
33
31
|
metric_state_reconciliations,
|
34
32
|
)
|
35
33
|
from .state_reporter import ExecutorStateReporter
|
36
|
-
from .task_controller import TaskController, task_logger, validate_task
|
37
34
|
|
38
35
|
_RECONCILE_STREAM_BACKOFF_INTERVAL_SEC = 5
|
39
36
|
_RECONCILIATION_RETRIES = 3
|
@@ -45,10 +42,9 @@ class ExecutorStateReconciler:
|
|
45
42
|
executor_id: str,
|
46
43
|
function_executor_server_factory: FunctionExecutorServerFactory,
|
47
44
|
base_url: str,
|
48
|
-
function_executor_states: FunctionExecutorStatesContainer,
|
49
45
|
config_path: Optional[str],
|
50
|
-
|
51
|
-
|
46
|
+
cache_path: Path,
|
47
|
+
blob_store: BLOBStore,
|
52
48
|
channel_manager: ChannelManager,
|
53
49
|
state_reporter: ExecutorStateReporter,
|
54
50
|
logger: Any,
|
@@ -60,21 +56,17 @@ class ExecutorStateReconciler:
|
|
60
56
|
)
|
61
57
|
self._base_url: str = base_url
|
62
58
|
self._config_path: Optional[str] = config_path
|
63
|
-
self.
|
64
|
-
self.
|
59
|
+
self._cache_path: Path = cache_path
|
60
|
+
self._blob_store: BLOBStore = blob_store
|
65
61
|
self._channel_manager: ChannelManager = channel_manager
|
66
62
|
self._state_reporter: ExecutorStateReporter = state_reporter
|
67
|
-
self._reconciliation_loop_task: Optional[asyncio.Task] = None
|
68
63
|
self._logger: Any = logger.bind(module=__name__)
|
69
64
|
self._server_backoff_interval_sec: int = server_backoff_interval_sec
|
70
65
|
|
71
66
|
# Mutable state. Doesn't need lock because we access from async tasks running in the same thread.
|
72
|
-
self.
|
73
|
-
self.
|
74
|
-
function_executor_states
|
75
|
-
)
|
67
|
+
self._desired_states_reader_task: Optional[asyncio.Task] = None
|
68
|
+
self._reconciliation_loop_task: Optional[asyncio.Task] = None
|
76
69
|
self._function_executor_controllers: Dict[str, FunctionExecutorController] = {}
|
77
|
-
self._task_controllers: Dict[str, TaskController] = {}
|
78
70
|
self._last_server_clock: Optional[int] = None
|
79
71
|
|
80
72
|
self._last_desired_state_lock = asyncio.Lock()
|
@@ -83,23 +75,79 @@ class ExecutorStateReconciler:
|
|
83
75
|
)
|
84
76
|
self._last_desired_state: Optional[DesiredExecutorState] = None
|
85
77
|
|
86
|
-
|
78
|
+
def run(self):
|
87
79
|
"""Runs the state reconciler.
|
88
80
|
|
89
|
-
Never raises any exceptions.
|
81
|
+
Never raises any exceptions. Doesn't block.
|
90
82
|
"""
|
83
|
+
if self._reconciliation_loop_task is not None:
|
84
|
+
self._logger.error(
|
85
|
+
"reconciliation loop task is already running, skipping run call"
|
86
|
+
)
|
87
|
+
return
|
88
|
+
|
91
89
|
self._reconciliation_loop_task = asyncio.create_task(
|
92
90
|
self._reconciliation_loop(),
|
93
91
|
name="state reconciler reconciliation loop",
|
94
92
|
)
|
93
|
+
self._desired_states_reader_task = asyncio.create_task(
|
94
|
+
self._desired_states_reader_loop(),
|
95
|
+
name="state reconciler desired states stream reader",
|
96
|
+
)
|
97
|
+
|
98
|
+
async def shutdown(self):
|
99
|
+
"""Shuts down the state reconciler.
|
100
|
+
|
101
|
+
Never raises any exceptions.
|
102
|
+
"""
|
103
|
+
if self._reconciliation_loop_task is not None:
|
104
|
+
self._reconciliation_loop_task.cancel()
|
105
|
+
try:
|
106
|
+
await self._reconciliation_loop_task
|
107
|
+
except asyncio.CancelledError:
|
108
|
+
# Expected cancellation, nothing to do.
|
109
|
+
pass
|
110
|
+
self._logger.info("reconciliation loop is shutdown")
|
111
|
+
|
112
|
+
if self._desired_states_reader_task is not None:
|
113
|
+
self._desired_states_reader_task.cancel()
|
114
|
+
try:
|
115
|
+
await self._desired_states_reader_task
|
116
|
+
except asyncio.CancelledError:
|
117
|
+
# Expected cancellation, nothing to do.
|
118
|
+
pass
|
119
|
+
self._logger.info("desired states stream reader loop is shutdown")
|
120
|
+
|
121
|
+
# Now all the aio tasks exited so nothing will intervene with our actions from this point.
|
122
|
+
fe_shutdown_tasks: List[asyncio.Task] = []
|
123
|
+
for fe_controller in self._function_executor_controllers.values():
|
124
|
+
fe_shutdown_tasks.append(
|
125
|
+
asyncio.create_task(
|
126
|
+
fe_controller.shutdown(
|
127
|
+
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_EXECUTOR_SHUTDOWN
|
128
|
+
),
|
129
|
+
name=f"Shutdown Function Executor {fe_controller.function_executor_id()}",
|
130
|
+
)
|
131
|
+
)
|
132
|
+
|
133
|
+
# Run all the shutdown tasks concurrently and wait for them to complete.
|
134
|
+
for task in fe_shutdown_tasks:
|
135
|
+
await task
|
136
|
+
|
137
|
+
self._function_executor_controllers.clear()
|
138
|
+
self._logger.info("state reconciler is shutdown")
|
139
|
+
|
140
|
+
async def _desired_states_reader_loop(self):
|
141
|
+
"""Reads the desired states stream from Server and processes it.
|
95
142
|
|
96
|
-
|
97
|
-
|
143
|
+
Never raises any exceptions. Get cancelled via aio task cancellation.
|
144
|
+
"""
|
145
|
+
while True:
|
98
146
|
try:
|
99
147
|
stub = ExecutorAPIStub(await self._channel_manager.get_channel())
|
100
148
|
# Report state once before starting the stream so Server
|
101
149
|
# doesn't use stale state it knew about this Executor in the past.
|
102
|
-
await self._state_reporter.
|
150
|
+
await self._state_reporter.report_state_and_wait_for_completion()
|
103
151
|
|
104
152
|
desired_states_stream: AsyncGenerator[DesiredExecutorState, None] = (
|
105
153
|
stub.get_desired_executor_states(
|
@@ -123,9 +171,6 @@ class ExecutorStateReconciler:
|
|
123
171
|
self, desired_states: AsyncGenerator[DesiredExecutorState, None]
|
124
172
|
):
|
125
173
|
async for new_state in desired_states:
|
126
|
-
if self._is_shutdown:
|
127
|
-
return
|
128
|
-
|
129
174
|
new_state: DesiredExecutorState
|
130
175
|
validator: MessageValidator = MessageValidator(new_state)
|
131
176
|
try:
|
@@ -155,28 +200,12 @@ class ExecutorStateReconciler:
|
|
155
200
|
self._last_desired_state = new_state
|
156
201
|
self._last_desired_state_change_notifier.notify_all()
|
157
202
|
|
158
|
-
async def shutdown(self):
|
159
|
-
"""Shuts down the state reconciler.
|
160
|
-
|
161
|
-
Never raises any exceptions.
|
162
|
-
"""
|
163
|
-
self._is_shutdown = True
|
164
|
-
if self._reconciliation_loop_task is not None:
|
165
|
-
self._reconciliation_loop_task.cancel()
|
166
|
-
self._logger.info("reconciliation loop shutdown")
|
167
|
-
|
168
|
-
for controller in self._task_controllers.values():
|
169
|
-
await controller.destroy()
|
170
|
-
# FEs are destroyed in executor.py right now.
|
171
|
-
# TODO: Once HTTP loop is removed add all FE state and controllers
|
172
|
-
# shutdown logic here. This should allow us to get rid of hacky
|
173
|
-
# "cancel all tasks loop" in executor.py shutdown and make the shutdown
|
174
|
-
# much more controllable and clean. E.g. we would be able to remove logs
|
175
|
-
# suppression from shutdown logic. Also need to shutdown self._function_executor_controllers.
|
176
|
-
|
177
203
|
async def _reconciliation_loop(self):
|
204
|
+
"""Continuously reconciles the desired state with the current state.
|
205
|
+
|
206
|
+
Never raises any exceptions. Get cancelled via aio task cancellation."""
|
178
207
|
last_reconciled_state: Optional[DesiredExecutorState] = None
|
179
|
-
while
|
208
|
+
while True:
|
180
209
|
async with self._last_desired_state_lock:
|
181
210
|
# Comparing object identities (references) is enough here to not reconcile
|
182
211
|
# the same state twice.
|
@@ -200,10 +229,8 @@ class ExecutorStateReconciler:
|
|
200
229
|
for attempt in range(_RECONCILIATION_RETRIES):
|
201
230
|
try:
|
202
231
|
# Reconcile FEs first because Tasks depend on them.
|
203
|
-
|
204
|
-
|
205
|
-
)
|
206
|
-
await self._reconcile_tasks(desired_state.task_allocations)
|
232
|
+
self._reconcile_function_executors(desired_state.function_executors)
|
233
|
+
self._reconcile_tasks(desired_state.task_allocations)
|
207
234
|
return
|
208
235
|
except Exception as e:
|
209
236
|
self._logger.error(
|
@@ -219,30 +246,20 @@ class ExecutorStateReconciler:
|
|
219
246
|
f"failed to reconcile desired state after {_RECONCILIATION_RETRIES} attempts",
|
220
247
|
)
|
221
248
|
|
222
|
-
|
249
|
+
def _reconcile_function_executors(
|
223
250
|
self, function_executor_descriptions: Iterable[FunctionExecutorDescription]
|
224
251
|
):
|
225
252
|
valid_fe_descriptions: List[FunctionExecutorDescription] = (
|
226
253
|
self._valid_function_executor_descriptions(function_executor_descriptions)
|
227
254
|
)
|
228
255
|
for fe_description in valid_fe_descriptions:
|
229
|
-
|
256
|
+
self._reconcile_function_executor(fe_description)
|
230
257
|
|
231
258
|
seen_fe_ids: Set[str] = set(map(lambda fe: fe.id, valid_fe_descriptions))
|
232
259
|
fe_ids_to_remove = set(self._function_executor_controllers.keys()) - seen_fe_ids
|
233
|
-
for
|
234
|
-
#
|
235
|
-
|
236
|
-
await self._function_executor_controllers.pop(
|
237
|
-
function_executor_id
|
238
|
-
).shutdown()
|
239
|
-
# Schedule removal of the FE state after shutdown. This is required for Server
|
240
|
-
# to known when exactly FE resources are freed so it can put a replacement FE if needed.
|
241
|
-
# Running in a separate asyncio task because this will block until the shutdown is complete.
|
242
|
-
asyncio.create_task(
|
243
|
-
self._remove_function_executor_after_shutdown(function_executor_id),
|
244
|
-
name="Remove Function Executor after shutdown",
|
245
|
-
)
|
260
|
+
for fe_id in fe_ids_to_remove:
|
261
|
+
# Server forgot this FE, so its safe to forget about it now too.
|
262
|
+
self._remove_function_executor_controller(fe_id)
|
246
263
|
|
247
264
|
def _valid_function_executor_descriptions(
|
248
265
|
self, function_executor_descriptions: Iterable[FunctionExecutorDescription]
|
@@ -267,7 +284,7 @@ class ExecutorStateReconciler:
|
|
267
284
|
|
268
285
|
return valid_function_executor_descriptions
|
269
286
|
|
270
|
-
|
287
|
+
def _reconcile_function_executor(
|
271
288
|
self, function_executor_description: FunctionExecutorDescription
|
272
289
|
):
|
273
290
|
"""Reconciles a single Function Executor with the desired state.
|
@@ -275,114 +292,94 @@ class ExecutorStateReconciler:
|
|
275
292
|
Doesn't block on any long running operations. Doesn't raise any exceptions.
|
276
293
|
"""
|
277
294
|
|
278
|
-
if not self.
|
279
|
-
|
295
|
+
if function_executor_description.id not in self._function_executor_controllers:
|
296
|
+
self._add_function_executor_controller(function_executor_description)
|
280
297
|
|
281
|
-
|
298
|
+
def _add_function_executor_controller(
|
282
299
|
self, function_executor_description: FunctionExecutorDescription
|
283
300
|
) -> None:
|
284
|
-
"""Creates Function Executor for the supplied description.
|
301
|
+
"""Creates Function Executor for the supplied description and adds it to internal data structures.
|
285
302
|
|
286
303
|
Doesn't block on any long running operations. Doesn't raise any exceptions.
|
287
304
|
"""
|
288
305
|
logger = function_executor_logger(function_executor_description, self._logger)
|
289
306
|
try:
|
290
|
-
# TODO: Store FE description in FE state object once we migrate to gRPC State Reconciler.
|
291
|
-
# Then most of these parameters will be removed. Also remove the container and use a simple
|
292
|
-
# Dict once FE shutdown logic is moved into reconciler.
|
293
|
-
function_executor_state: FunctionExecutorState = (
|
294
|
-
await self._function_executor_states.get_or_create_state(
|
295
|
-
id=function_executor_description.id,
|
296
|
-
namespace=function_executor_description.namespace,
|
297
|
-
graph_name=function_executor_description.graph_name,
|
298
|
-
graph_version=function_executor_description.graph_version,
|
299
|
-
function_name=function_executor_description.function_name,
|
300
|
-
image_uri=(
|
301
|
-
function_executor_description.image_uri
|
302
|
-
if function_executor_description.HasField("image_uri")
|
303
|
-
else None
|
304
|
-
),
|
305
|
-
secret_names=list(function_executor_description.secret_names),
|
306
|
-
)
|
307
|
-
)
|
308
307
|
controller: FunctionExecutorController = FunctionExecutorController(
|
309
308
|
executor_id=self._executor_id,
|
310
|
-
function_executor_state=function_executor_state,
|
311
309
|
function_executor_description=function_executor_description,
|
312
310
|
function_executor_server_factory=self._function_executor_server_factory,
|
313
|
-
|
311
|
+
state_reporter=self._state_reporter,
|
312
|
+
blob_store=self._blob_store,
|
314
313
|
base_url=self._base_url,
|
315
314
|
config_path=self._config_path,
|
315
|
+
cache_path=self._cache_path,
|
316
316
|
logger=self._logger,
|
317
317
|
)
|
318
318
|
self._function_executor_controllers[function_executor_description.id] = (
|
319
319
|
controller
|
320
320
|
)
|
321
|
-
|
322
|
-
# IDLE and start running tasks on it. Server currently doesn't explicitly manage the desired FE status.
|
323
|
-
await controller.startup()
|
321
|
+
controller.startup()
|
324
322
|
except Exception as e:
|
325
323
|
logger.error("failed adding Function Executor", exc_info=e)
|
326
324
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
325
|
+
def _remove_function_executor_controller(self, function_executor_id: str) -> None:
|
326
|
+
fe_controller: FunctionExecutorController = (
|
327
|
+
self._function_executor_controllers.pop(function_executor_id)
|
328
|
+
)
|
329
|
+
asyncio.create_task(
|
330
|
+
fe_controller.shutdown(
|
331
|
+
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE
|
332
|
+
),
|
333
|
+
name=f"Shutdown Function Executor {function_executor_id}",
|
332
334
|
)
|
333
|
-
async with fe_state.lock:
|
334
|
-
await fe_state.wait_status(allowlist=[FunctionExecutorStatus.SHUTDOWN])
|
335
|
-
# The whole reconciler could shutdown while we were waiting for the FE to shutdown.
|
336
|
-
if not self._is_shutdown:
|
337
|
-
await self._function_executor_states.pop(function_executor_id)
|
338
335
|
|
339
|
-
|
336
|
+
def _reconcile_tasks(self, task_allocations: Iterable[TaskAllocation]):
|
340
337
|
valid_task_allocations: List[TaskAllocation] = self._valid_task_allocations(
|
341
338
|
task_allocations
|
342
339
|
)
|
343
340
|
for task_allocation in valid_task_allocations:
|
344
|
-
|
341
|
+
self._reconcile_task(task_allocation)
|
345
342
|
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
343
|
+
# Cancel tasks that are no longer in the desired state.
|
344
|
+
# FE ID => [Task ID]
|
345
|
+
desired_task_ids_per_fe: Dict[str, List[str]] = {}
|
346
|
+
for task_allocation in valid_task_allocations:
|
347
|
+
if task_allocation.function_executor_id not in desired_task_ids_per_fe:
|
348
|
+
desired_task_ids_per_fe[task_allocation.function_executor_id] = []
|
349
|
+
desired_task_ids_per_fe[task_allocation.function_executor_id].append(
|
350
|
+
task_allocation.task.id
|
351
|
+
)
|
352
352
|
|
353
|
-
|
353
|
+
for fe_controller in self._function_executor_controllers.values():
|
354
|
+
fe_controller: FunctionExecutorController
|
355
|
+
if fe_controller.function_executor_id() in desired_task_ids_per_fe:
|
356
|
+
desired_fe_task_ids: Set[str] = set(
|
357
|
+
desired_task_ids_per_fe[fe_controller.function_executor_id()]
|
358
|
+
)
|
359
|
+
else:
|
360
|
+
# No tasks desired for this FE, so cancel all its tasks.
|
361
|
+
desired_fe_task_ids: Set[str] = set()
|
362
|
+
actual_fe_task_ids: Set[str] = set(fe_controller.task_ids())
|
363
|
+
task_ids_to_remove: Set[str] = actual_fe_task_ids - desired_fe_task_ids
|
364
|
+
for task_id in task_ids_to_remove:
|
365
|
+
fe_controller.remove_task(task_id)
|
366
|
+
|
367
|
+
def _reconcile_task(self, task_allocation: TaskAllocation):
|
354
368
|
"""Reconciles a single TaskAllocation with the desired state.
|
355
369
|
|
356
370
|
Doesn't raise any exceptions.
|
357
371
|
"""
|
358
|
-
|
359
|
-
|
372
|
+
function_executor_controller: FunctionExecutorController = (
|
373
|
+
self._function_executor_controllers[task_allocation.function_executor_id]
|
374
|
+
)
|
375
|
+
if function_executor_controller.has_task(task_allocation.task.id):
|
376
|
+
# Nothing to do, task already exists and it's immutable.
|
360
377
|
return
|
361
378
|
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
task_allocation.function_executor_id
|
367
|
-
)
|
368
|
-
)
|
369
|
-
self._task_controllers[task_allocation.task.id] = TaskController(
|
370
|
-
task=task_allocation.task,
|
371
|
-
downloader=self._downloader,
|
372
|
-
task_reporter=self._task_reporter,
|
373
|
-
function_executor_id=task_allocation.function_executor_id,
|
374
|
-
function_executor_state=function_executor_state,
|
375
|
-
logger=self._logger,
|
376
|
-
)
|
377
|
-
except Exception as e:
|
378
|
-
logger.error("failed adding TaskController", exc_info=e)
|
379
|
-
|
380
|
-
async def _remove_task(self, task_id: str) -> None:
|
381
|
-
"""Schedules removal of an existing task.
|
382
|
-
|
383
|
-
Doesn't block on any long running operations. Doesn't raise any exceptions.
|
384
|
-
"""
|
385
|
-
await self._task_controllers.pop(task_id).destroy()
|
379
|
+
function_executor_controller.add_task(
|
380
|
+
task=task_allocation.task,
|
381
|
+
allocation_id=task_allocation.allocation_id,
|
382
|
+
)
|
386
383
|
|
387
384
|
def _valid_task_allocations(self, task_allocations: Iterable[TaskAllocation]):
|
388
385
|
valid_task_allocations: List[TaskAllocation] = []
|
@@ -403,6 +400,7 @@ class ExecutorStateReconciler:
|
|
403
400
|
validator = MessageValidator(task_allocation)
|
404
401
|
try:
|
405
402
|
validator.required_field("function_executor_id")
|
403
|
+
validator.required_field("allocation_id")
|
406
404
|
except ValueError as e:
|
407
405
|
# There's no way to report this error to Server so just log it.
|
408
406
|
logger.error(
|