indexify 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. indexify/executor/executor.py +11 -7
  2. indexify/executor/function_executor_controller/__init__.py +2 -2
  3. indexify/executor/function_executor_controller/completed_task_allocation_metrics.py +87 -0
  4. indexify/executor/function_executor_controller/events.py +29 -33
  5. indexify/executor/function_executor_controller/{finalize_task.py → finalize_task_allocation.py} +45 -37
  6. indexify/executor/function_executor_controller/function_executor_controller.py +194 -180
  7. indexify/executor/function_executor_controller/loggers.py +15 -17
  8. indexify/executor/function_executor_controller/message_validators.py +4 -12
  9. indexify/executor/function_executor_controller/metrics/completed_task_allocation_metrics.py +70 -0
  10. indexify/executor/function_executor_controller/metrics/finalize_task_allocation.py +26 -0
  11. indexify/executor/function_executor_controller/metrics/function_executor_controller.py +12 -11
  12. indexify/executor/function_executor_controller/metrics/prepare_task_allocation.py +27 -0
  13. indexify/executor/function_executor_controller/{prepare_task.py → prepare_task_allocation.py} +33 -29
  14. indexify/executor/function_executor_controller/{run_task.py → run_task_allocation.py} +54 -51
  15. indexify/executor/function_executor_controller/{task_info.py → task_allocation_info.py} +6 -6
  16. indexify/executor/function_executor_controller/{task_input.py → task_allocation_input.py} +2 -2
  17. indexify/executor/function_executor_controller/{task_output.py → task_allocation_output.py} +24 -24
  18. indexify/executor/monitoring/desired_state_handler.py +24 -0
  19. indexify/executor/monitoring/reported_state_handler.py +22 -0
  20. indexify/executor/monitoring/server.py +4 -0
  21. indexify/executor/state_reconciler.py +26 -19
  22. indexify/executor/state_reporter.py +9 -4
  23. {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/METADATA +2 -2
  24. {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/RECORD +27 -25
  25. indexify/executor/function_executor_controller/completed_task_metrics.py +0 -83
  26. indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -68
  27. indexify/executor/function_executor_controller/metrics/finalize_task.py +0 -20
  28. indexify/executor/function_executor_controller/metrics/prepare_task.py +0 -18
  29. /indexify/executor/function_executor_controller/metrics/{run_task.py → run_task_allocation.py} +0 -0
  30. {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/WHEEL +0 -0
  31. {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from indexify.proto.executor_api_pb2 import (
29
29
  TaskResult,
30
30
  )
31
31
 
32
- from .completed_task_metrics import emit_completed_task_metrics
32
+ from .completed_task_allocation_metrics import emit_completed_task_allocation_metrics
33
33
  from .create_function_executor import create_function_executor
34
34
  from .debug_event_loop import (
35
35
  debug_print_adding_event,
@@ -41,13 +41,13 @@ from .events import (
41
41
  EventType,
42
42
  FunctionExecutorCreated,
43
43
  FunctionExecutorTerminated,
44
- ScheduleTaskExecution,
44
+ ScheduleTaskAllocationExecution,
45
45
  ShutdownInitiated,
46
- TaskExecutionFinished,
47
- TaskFinalizationFinished,
48
- TaskPreparationFinished,
46
+ TaskAllocationExecutionFinished,
47
+ TaskAllocationFinalizationFinished,
48
+ TaskAllocationPreparationFinished,
49
49
  )
50
- from .finalize_task import finalize_task
50
+ from .finalize_task_allocation import finalize_task_allocation
51
51
  from .loggers import function_executor_logger, task_allocation_logger
52
52
  from .metrics.function_executor_controller import (
53
53
  METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED,
@@ -58,16 +58,16 @@ from .metrics.function_executor_controller import (
58
58
  METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_UNKNOWN,
59
59
  metric_control_loop_handle_event_latency,
60
60
  metric_function_executors_with_state,
61
- metric_runnable_tasks,
62
- metric_runnable_tasks_per_function_name,
63
- metric_schedule_task_latency,
64
- metric_tasks_fetched,
61
+ metric_runnable_task_allocations,
62
+ metric_runnable_task_allocations_per_function_name,
63
+ metric_schedule_task_allocation_latency,
64
+ metric_task_allocations_fetched,
65
65
  )
66
- from .prepare_task import prepare_task
67
- from .run_task import run_task_on_function_executor
68
- from .task_info import TaskInfo
69
- from .task_input import TaskInput
70
- from .task_output import TaskOutput
66
+ from .prepare_task_allocation import prepare_task_allocation
67
+ from .run_task_allocation import run_task_allocation_on_function_executor
68
+ from .task_allocation_info import TaskAllocationInfo
69
+ from .task_allocation_input import TaskAllocationInput
70
+ from .task_allocation_output import TaskAllocationOutput
71
71
  from .terminate_function_executor import terminate_function_executor
72
72
 
73
73
 
@@ -133,84 +133,87 @@ class FunctionExecutorController:
133
133
  self._control_loop_aio_task: Optional[asyncio.Task] = None
134
134
  # aio tasks spawned by the control loop.
135
135
  self._running_aio_tasks: List[asyncio.Task] = []
136
- # Info for all known tasks, Task ID -> TaskInfo.
137
- self._tasks: Dict[str, TaskInfo] = {}
138
- # Tracking of task execution on Function Executor.
139
- self._runnable_tasks: List[TaskInfo] = []
140
- self._running_tasks: List[TaskInfo] = []
136
+ # All task allocations assigned to FE, Allocation ID -> TaskAllocationInfo.
137
+ self._task_allocations: Dict[str, TaskAllocationInfo] = {}
138
+ # Task allocation prepared for execution on FE.
139
+ self._runnable_task_allocations: List[TaskAllocationInfo] = []
140
+ # Task allocations currently running on the FE.
141
+ self._running_task_allocations: List[TaskAllocationInfo] = []
141
142
 
142
143
  def function_executor_id(self) -> str:
143
144
  return self._fe_description.id
144
145
 
145
146
  def add_task_allocation(self, task_allocation: TaskAllocation) -> None:
146
- """Adds a task to the Function Executor.
147
+ """Adds a task allocation to the Function Executor.
147
148
 
148
149
  Not blocking. Never raises exceptions.
149
150
  """
150
151
  logger = task_allocation_logger(task_allocation, self._logger)
151
- if self.has_task(task_allocation.task.id):
152
+ if self.has_task_allocation(task_allocation.allocation_id):
152
153
  logger.warning(
153
- "attempted to add already added task to Function Executor",
154
+ "attempted to add already added task allocation to Function Executor",
154
155
  )
155
156
  return
156
157
 
157
- metric_tasks_fetched.inc()
158
- task_info: TaskInfo = TaskInfo(
158
+ metric_task_allocations_fetched.inc()
159
+ alloc_info: TaskAllocationInfo = TaskAllocationInfo(
159
160
  allocation=task_allocation, start_time=time.monotonic()
160
161
  )
161
- self._tasks[task_allocation.task.id] = task_info
162
- next_aio = prepare_task(
163
- task_info=task_info,
162
+ self._task_allocations[task_allocation.allocation_id] = alloc_info
163
+ next_aio = prepare_task_allocation(
164
+ alloc_info=alloc_info,
164
165
  blob_store=self._blob_store,
165
166
  logger=logger,
166
167
  )
167
- self._spawn_aio_for_task(
168
+ self._spawn_aio_for_task_alloc(
168
169
  aio=next_aio,
169
- task_info=task_info,
170
- on_exception=TaskPreparationFinished(task_info=task_info, is_success=False),
170
+ alloc_info=alloc_info,
171
+ on_exception=TaskAllocationPreparationFinished(
172
+ alloc_info=alloc_info, is_success=False
173
+ ),
171
174
  )
172
175
 
173
- def has_task(self, task_id: str) -> bool:
174
- """Checks if the Function Executor has a task with the given ID.
176
+ def has_task_allocation(self, task_allocation_id: str) -> bool:
177
+ """Checks if the Function Executor has a task allocation with the given ID.
175
178
 
176
179
  Not blocking. Never raises exceptions.
177
180
  """
178
- return task_id in self._tasks
181
+ return task_allocation_id in self._task_allocations
179
182
 
180
- def task_ids(self) -> List[str]:
181
- """Returns the list of task IDs known to the Function Executor.
183
+ def task_allocation_ids(self) -> List[str]:
184
+ """Returns the list of task allocation IDs known to the Function Executor.
182
185
 
183
186
  Not blocking. Never raises exceptions.
184
187
  """
185
- return list(self._tasks.keys())
188
+ return list(self._task_allocations.keys())
186
189
 
187
- def remove_task(self, task_id: str) -> None:
188
- """Removes the task from the Function Executor.
190
+ def remove_task_allocation(self, task_allocation_id: str) -> None:
191
+ """Removes the task allocation from the Function Executor.
189
192
 
190
- Cancels the task if it's in progress. Just removes the task if it was already completed.
191
- The cancellation is asynchronous and might take a while to complete.
193
+ Cancels the task allocation if it's in progress. Just removes the task allocation if it was already
194
+ completed. The cancellation is asynchronous and might take a while to complete.
192
195
  Until the cancellation is complete, the task won't be removed from the Function Executor.
193
196
  Not blocking. Never raises exceptions.
194
197
  """
195
- if not self.has_task(task_id):
198
+ if not self.has_task_allocation(task_allocation_id):
196
199
  self._logger.warning(
197
- "attempted to cancel a task that is not known to the Function Executor",
198
- task_id=task_id,
200
+ "attempted to cancel a task allocation that is not known to the Function Executor",
201
+ task_id=task_allocation_id,
199
202
  )
200
203
  return
201
204
 
202
- task_info: TaskInfo = self._tasks.pop(task_id)
203
- if task_info.is_completed:
205
+ alloc_info: TaskAllocationInfo = self._task_allocations.pop(task_allocation_id)
206
+ if alloc_info.is_completed:
204
207
  return # Server processed the completed task outputs, we can forget it now.
205
208
 
206
209
  # Task cancellation is required as the task is not completed yet.
207
- logger = task_allocation_logger(task_info.allocation, self._logger)
208
- task_info.is_cancelled = True
210
+ logger = task_allocation_logger(alloc_info.allocation, self._logger)
211
+ alloc_info.is_cancelled = True
209
212
  logger.info(
210
- "cancelling task",
213
+ "cancelling task allocation",
211
214
  )
212
- if task_info.aio_task is not None:
213
- task_info.aio_task.cancel()
215
+ if alloc_info.aio_task is not None:
216
+ alloc_info.aio_task.cancel()
214
217
 
215
218
  def startup(self) -> None:
216
219
  """Starts up the Function Executor and prepares it to run tasks.
@@ -353,14 +356,14 @@ class FunctionExecutorController:
353
356
  return self._handle_event_function_executor_created(event)
354
357
  elif event.event_type == EventType.FUNCTION_EXECUTOR_TERMINATED:
355
358
  return self._handle_event_function_executor_terminated(event)
356
- elif event.event_type == EventType.TASK_PREPARATION_FINISHED:
357
- return self._handle_event_task_preparation_finished(event)
358
- elif event.event_type == EventType.SCHEDULE_TASK_EXECUTION:
359
- return self._handle_event_schedule_task_execution(event)
360
- elif event.event_type == EventType.TASK_EXECUTION_FINISHED:
361
- return self._handle_event_task_execution_finished(event)
362
- elif event.event_type == EventType.TASK_OUTPUT_UPLOAD_FINISHED:
363
- return self._handle_event_task_finalization_finished(event)
359
+ elif event.event_type == EventType.TASK_ALLOCATION_PREPARATION_FINISHED:
360
+ return self._handle_event_task_allocation_preparation_finished(event)
361
+ elif event.event_type == EventType.SCHEDULE_TASK_ALLOCATION_EXECUTION:
362
+ return self._handle_event_schedule_task_allocation_execution(event)
363
+ elif event.event_type == EventType.TASK_ALLOCATION_EXECUTION_FINISHED:
364
+ return self._handle_event_task_allocation_execution_finished(event)
365
+ elif event.event_type == EventType.TASK_ALLOCATION_FINALIZATION_FINISHED:
366
+ return self._handle_event_task_allocation_finalization_finished(event)
364
367
 
365
368
  self._logger.warning(
366
369
  "unexpected event type received", event_type=event.event_type.name
@@ -374,17 +377,17 @@ class FunctionExecutorController:
374
377
  self._events.append(event)
375
378
  self._event_added.set()
376
379
 
377
- def _spawn_aio_for_task(
380
+ def _spawn_aio_for_task_alloc(
378
381
  self,
379
382
  aio: Coroutine[Any, Any, BaseEvent],
380
- task_info: TaskInfo,
383
+ alloc_info: TaskAllocationInfo,
381
384
  on_exception: BaseEvent,
382
385
  ) -> None:
383
386
  self._spawn_aio(
384
387
  aio=aio,
385
- task_info=task_info,
388
+ alloc_info=alloc_info,
386
389
  on_exception=on_exception,
387
- logger=task_allocation_logger(task_info.allocation, self._logger),
390
+ logger=task_allocation_logger(alloc_info.allocation, self._logger),
388
391
  )
389
392
 
390
393
  def _spawn_aio_for_fe(
@@ -392,7 +395,7 @@ class FunctionExecutorController:
392
395
  ) -> None:
393
396
  self._spawn_aio(
394
397
  aio=aio,
395
- task_info=None,
398
+ alloc_info=None,
396
399
  on_exception=on_exception,
397
400
  logger=self._logger,
398
401
  )
@@ -400,7 +403,7 @@ class FunctionExecutorController:
400
403
  def _spawn_aio(
401
404
  self,
402
405
  aio: Coroutine[Any, Any, BaseEvent],
403
- task_info: Optional[TaskInfo],
406
+ alloc_info: Optional[TaskAllocationInfo],
404
407
  on_exception: BaseEvent,
405
408
  logger: Any,
406
409
  ) -> None:
@@ -410,9 +413,9 @@ class FunctionExecutorController:
410
413
  The coroutine should not raise any exceptions including BaseException.
411
414
  on_exception event will be added to the FE controller events if the aio task raises an unexpected exception.
412
415
  on_exception is required to not silently stall the task processing due to an unexpected exception.
413
- If task_info is not None, the aio task will be associated with the task_info while the aio task is running.
416
+ If alloc_info is not None, the aio task will be associated with the alloc_info while the aio task is running.
414
417
  Doesn't raise any exceptions. Doesn't block.
415
- Use `_spawn_aio_for_task` and `_spawn_aio_for_fe` instead of directly calling this method.
418
+ Use `_spawn_aio_for_task_alloc` and `_spawn_aio_for_fe` instead of directly calling this method.
416
419
  """
417
420
 
418
421
  aio_task_name: str = str(aio)
@@ -438,8 +441,8 @@ class FunctionExecutorController:
438
441
  )
439
442
  self._add_event(on_exception, source=aio_task_name)
440
443
  finally:
441
- if task_info is not None:
442
- task_info.aio_task = None
444
+ if alloc_info is not None:
445
+ alloc_info.aio_task = None
443
446
  self._running_aio_tasks.remove(asyncio.current_task())
444
447
 
445
448
  aio_wrapper_task: asyncio.Task = asyncio.create_task(
@@ -447,8 +450,8 @@ class FunctionExecutorController:
447
450
  name=f"function executor controller aio task '{aio_task_name}'",
448
451
  )
449
452
  self._running_aio_tasks.append(aio_wrapper_task)
450
- if task_info is not None:
451
- task_info.aio_task = aio_wrapper_task
453
+ if alloc_info is not None:
454
+ alloc_info.aio_task = aio_wrapper_task
452
455
 
453
456
  # Event handlers for the events added to the control loop.
454
457
  # All the event handlers are synchronous and never block on any long running operations.
@@ -466,18 +469,22 @@ class FunctionExecutorController:
466
469
  # The allocations we marked here also need to not used FE terminated failure reason in their outputs
467
470
  # because FE terminated means that the allocation wasn't the cause of the FE termination.
468
471
  allocation_ids_caused_termination: List[str] = []
469
- for task_info in self._tasks.values():
470
- task_logger = task_allocation_logger(task_info.allocation, self._logger)
471
- task_logger.info(
472
- "marking allocation failed on function executor startup failure"
472
+ for alloc_info in self._task_allocations.values():
473
+ task_alloc_logger = task_allocation_logger(
474
+ alloc_info.allocation, self._logger
475
+ )
476
+ task_alloc_logger.info(
477
+ "marking task allocation failed on function executor startup failure"
473
478
  )
474
479
  allocation_ids_caused_termination.append(
475
- task_info.allocation.allocation_id
480
+ alloc_info.allocation.allocation_id
476
481
  )
477
- task_info.output = TaskOutput.function_executor_startup_failed(
478
- allocation=task_info.allocation,
479
- fe_termination_reason=event.fe_termination_reason,
480
- logger=task_logger,
482
+ alloc_info.output = (
483
+ TaskAllocationOutput.function_executor_startup_failed(
484
+ allocation=alloc_info.allocation,
485
+ fe_termination_reason=event.fe_termination_reason,
486
+ logger=task_alloc_logger,
487
+ )
481
488
  )
482
489
  self._start_termination(
483
490
  fe_termination_reason=event.fe_termination_reason,
@@ -496,7 +503,7 @@ class FunctionExecutorController:
496
503
  # Health checker starts after FE creation and gets automatically stopped on FE destroy.
497
504
  self._fe.health_checker().start(self._health_check_failed_callback)
498
505
  self._add_event(
499
- ScheduleTaskExecution(),
506
+ ScheduleTaskAllocationExecution(),
500
507
  source="_handle_event_function_executor_created",
501
508
  )
502
509
 
@@ -526,7 +533,7 @@ class FunctionExecutorController:
526
533
 
527
534
  # Invoke the scheduler so it can fail runnable tasks with FE Terminated error.
528
535
  self._add_event(
529
- ScheduleTaskExecution(),
536
+ ScheduleTaskAllocationExecution(),
530
537
  source="_handle_event_function_executor_destroyed",
531
538
  )
532
539
 
@@ -539,55 +546,56 @@ class FunctionExecutorController:
539
546
  self._start_termination(
540
547
  fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY,
541
548
  allocation_ids_caused_termination=[
542
- task.allocation.allocation_id for task in self._running_tasks
549
+ alloc_info.allocation.allocation_id
550
+ for alloc_info in self._running_task_allocations
543
551
  ],
544
552
  )
545
553
 
546
- def _handle_event_task_preparation_finished(
547
- self, event: TaskPreparationFinished
554
+ def _handle_event_task_allocation_preparation_finished(
555
+ self, event: TaskAllocationPreparationFinished
548
556
  ) -> None:
549
- """Handles the task preparation finished event.
557
+ """Handles the task allocation preparation finished event.
550
558
 
551
559
  Doesn't raise any exceptions. Doesn't block.
552
560
  """
553
- task_info: TaskInfo = event.task_info
561
+ alloc_info: TaskAllocationInfo = event.alloc_info
554
562
 
555
- if task_info.is_cancelled:
556
- task_info.output = TaskOutput.task_cancelled(
557
- allocation=task_info.allocation,
558
- # Task was prepared but never executed
563
+ if alloc_info.is_cancelled:
564
+ alloc_info.output = TaskAllocationOutput.task_allocation_cancelled(
565
+ allocation=alloc_info.allocation,
566
+ # Task alloc was never executed
559
567
  execution_start_time=None,
560
568
  execution_end_time=None,
561
569
  )
562
- self._start_task_finalization(task_info)
570
+ self._start_task_allocation_finalization(alloc_info)
563
571
  return
564
572
 
565
573
  if not event.is_success:
566
- # Failed to prepare the task inputs.
567
- task_info.output = TaskOutput.internal_error(
568
- allocation=task_info.allocation,
569
- # Task was prepared but never executed
574
+ # Failed to prepare the task alloc inputs.
575
+ alloc_info.output = TaskAllocationOutput.internal_error(
576
+ allocation=alloc_info.allocation,
577
+ # Task alloc was never executed
570
578
  execution_start_time=None,
571
579
  execution_end_time=None,
572
580
  )
573
- self._start_task_finalization(task_info)
581
+ self._start_task_allocation_finalization(alloc_info)
574
582
  return
575
583
 
576
- task_info.prepared_time = time.monotonic()
577
- metric_runnable_tasks.inc()
578
- metric_runnable_tasks_per_function_name.labels(
579
- task_info.allocation.task.function_name
584
+ alloc_info.prepared_time = time.monotonic()
585
+ metric_runnable_task_allocations.inc()
586
+ metric_runnable_task_allocations_per_function_name.labels(
587
+ alloc_info.allocation.task.function_name
580
588
  ).inc()
581
- self._runnable_tasks.append(task_info)
589
+ self._runnable_task_allocations.append(alloc_info)
582
590
  self._add_event(
583
- ScheduleTaskExecution(),
584
- source="_handle_event_task_preparation_finished",
591
+ ScheduleTaskAllocationExecution(),
592
+ source="_handle_event_task_allocation_preparation_finished",
585
593
  )
586
594
 
587
- def _handle_event_schedule_task_execution(
588
- self, event: ScheduleTaskExecution
595
+ def _handle_event_schedule_task_allocation_execution(
596
+ self, event: ScheduleTaskAllocationExecution
589
597
  ) -> None:
590
- if len(self._runnable_tasks) == 0:
598
+ if len(self._runnable_task_allocations) == 0:
591
599
  return
592
600
 
593
601
  if self._internal_state not in [
@@ -599,144 +607,150 @@ class FunctionExecutorController:
599
607
 
600
608
  if (
601
609
  self._internal_state == _FE_CONTROLLER_STATE.RUNNING
602
- and len(self._running_tasks) == self._fe_description.max_concurrency
610
+ and len(self._running_task_allocations)
611
+ == self._fe_description.max_concurrency
603
612
  ):
604
613
  return
605
614
 
606
- # Take the next task from head to get FIFO order and improve fairness.
607
- task_info: TaskInfo = self._pop_runnable_task()
615
+ # Take the next task alloc from head to get FIFO order and improve fairness.
616
+ alloc_info: TaskAllocationInfo = self._pop_runnable_task_allocation()
608
617
  # Re-invoke the scheduler later to process the next runnable task if this one can't run on FE.
609
618
  self._add_event(
610
- ScheduleTaskExecution(),
611
- source="_handle_event_schedule_task_execution",
619
+ ScheduleTaskAllocationExecution(),
620
+ source="_handle_event_schedule_task_allocation_execution",
612
621
  )
613
622
 
614
- if task_info.is_cancelled:
615
- task_info.output = TaskOutput.task_cancelled(
616
- allocation=task_info.allocation,
617
- # Task is runnable but it was never executed
623
+ if alloc_info.is_cancelled:
624
+ alloc_info.output = TaskAllocationOutput.task_allocation_cancelled(
625
+ allocation=alloc_info.allocation,
626
+ # Task alloc was never executed
618
627
  execution_start_time=None,
619
628
  execution_end_time=None,
620
629
  )
621
- self._start_task_finalization(task_info)
630
+ self._start_task_allocation_finalization(alloc_info)
622
631
  elif self._internal_state in [
623
632
  _FE_CONTROLLER_STATE.TERMINATING,
624
633
  _FE_CONTROLLER_STATE.TERMINATED,
625
634
  ]:
626
- if task_info.output is None:
627
- # The output could be set already by FE startup failure handler.
628
- task_info.output = TaskOutput.function_executor_terminated(
629
- task_info.allocation
635
+ # The output could be set already by FE startup failure handler.
636
+ if alloc_info.output is None:
637
+ alloc_info.output = TaskAllocationOutput.function_executor_terminated(
638
+ alloc_info.allocation
630
639
  )
631
- self._start_task_finalization(task_info)
640
+ self._start_task_allocation_finalization(alloc_info)
632
641
  elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
633
- self._running_tasks.append(task_info)
634
- next_aio = run_task_on_function_executor(
635
- task_info=task_info,
642
+ self._running_task_allocations.append(alloc_info)
643
+ next_aio = run_task_allocation_on_function_executor(
644
+ alloc_info=alloc_info,
636
645
  function_executor=self._fe,
637
- logger=task_allocation_logger(task_info.allocation, self._logger),
646
+ logger=task_allocation_logger(alloc_info.allocation, self._logger),
638
647
  )
639
- self._spawn_aio_for_task(
648
+ self._spawn_aio_for_task_alloc(
640
649
  aio=next_aio,
641
- task_info=task_info,
642
- on_exception=TaskExecutionFinished(
643
- task_info=task_info,
650
+ alloc_info=alloc_info,
651
+ on_exception=TaskAllocationExecutionFinished(
652
+ alloc_info=alloc_info,
644
653
  function_executor_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR,
645
654
  ),
646
655
  )
647
656
  else:
648
- task_allocation_logger(task_info.allocation, self._logger).error(
649
- "failed to schedule task execution, this should never happen"
657
+ task_allocation_logger(alloc_info.allocation, self._logger).error(
658
+ "failed to schedule task allocation execution, this should never happen"
650
659
  )
651
660
 
652
- def _pop_runnable_task(self) -> TaskInfo:
653
- task_info: TaskInfo = self._runnable_tasks.pop(0)
654
- metric_schedule_task_latency.observe(time.monotonic() - task_info.prepared_time)
655
- metric_runnable_tasks.dec()
656
- metric_runnable_tasks_per_function_name.labels(
657
- task_info.allocation.task.function_name
661
+ def _pop_runnable_task_allocation(self) -> TaskAllocationInfo:
662
+ alloc_info: TaskAllocationInfo = self._runnable_task_allocations.pop(0)
663
+ metric_schedule_task_allocation_latency.observe(
664
+ time.monotonic() - alloc_info.prepared_time
665
+ )
666
+ metric_runnable_task_allocations.dec()
667
+ metric_runnable_task_allocations_per_function_name.labels(
668
+ alloc_info.allocation.task.function_name
658
669
  ).dec()
659
- return task_info
670
+ return alloc_info
660
671
 
661
- def _handle_event_task_execution_finished(
662
- self, event: TaskExecutionFinished
672
+ def _handle_event_task_allocation_execution_finished(
673
+ self, event: TaskAllocationExecutionFinished
663
674
  ) -> None:
664
- """Handles the task execution finished event.
675
+ """Handles the task allocation execution finished event.
665
676
 
666
677
  Doesn't raise any exceptions. Doesn't block.
667
678
  """
668
- task_info: TaskInfo = event.task_info
669
- self._running_tasks.remove(task_info)
679
+ alloc_info: TaskAllocationInfo = event.alloc_info
680
+ self._running_task_allocations.remove(alloc_info)
670
681
 
671
682
  if event.function_executor_termination_reason is None:
672
683
  self._add_event(
673
- ScheduleTaskExecution(), source="_handle_event_task_execution_finished"
684
+ ScheduleTaskAllocationExecution(),
685
+ source="_handle_event_task_allocation_execution_finished",
674
686
  )
675
687
  else:
676
688
  self._start_termination(
677
689
  fe_termination_reason=event.function_executor_termination_reason,
678
- allocation_ids_caused_termination=[
679
- event.task_info.allocation.allocation_id
680
- ],
690
+ allocation_ids_caused_termination=[alloc_info.allocation.allocation_id],
681
691
  )
682
692
 
683
- if task_info.output is None:
693
+ if alloc_info.output is None:
684
694
  # `run_task_on_function_executor` guarantees that the output is set in
685
695
  # all cases including task cancellations. If this didn't happen then some
686
696
  # internal error occurred in our code.
687
- task_info.output = TaskOutput.internal_error(
688
- allocation=task_info.allocation,
697
+ alloc_info.output = TaskAllocationOutput.internal_error(
698
+ allocation=alloc_info.allocation,
689
699
  execution_start_time=None,
690
700
  execution_end_time=None,
691
701
  )
692
702
 
693
- self._start_task_finalization(task_info)
703
+ self._start_task_allocation_finalization(alloc_info)
694
704
 
695
- def _start_task_finalization(self, task_info: TaskInfo) -> None:
696
- """Starts finalization for the given task.
705
+ def _start_task_allocation_finalization(
706
+ self, alloc_info: TaskAllocationInfo
707
+ ) -> None:
708
+ """Starts finalization for the given task allocation.
697
709
 
698
710
  Doesn't raise any exceptions. Doesn't block.
699
- task_info.output should not be None.
711
+ alloc_info.output should not be None.
700
712
  """
701
- next_aio = finalize_task(
702
- task_info=task_info,
713
+ next_aio = finalize_task_allocation(
714
+ task_alloc=alloc_info,
703
715
  blob_store=self._blob_store,
704
- logger=task_allocation_logger(task_info.allocation, self._logger),
716
+ logger=task_allocation_logger(alloc_info.allocation, self._logger),
705
717
  )
706
- self._spawn_aio_for_task(
718
+ self._spawn_aio_for_task_alloc(
707
719
  aio=next_aio,
708
- task_info=task_info,
709
- on_exception=TaskFinalizationFinished(
710
- task_info=task_info, is_success=False
720
+ alloc_info=alloc_info,
721
+ on_exception=TaskAllocationFinalizationFinished(
722
+ alloc_info=alloc_info, is_success=False
711
723
  ),
712
724
  )
713
725
 
714
- def _handle_event_task_finalization_finished(
715
- self, event: TaskFinalizationFinished
726
+ def _handle_event_task_allocation_finalization_finished(
727
+ self, event: TaskAllocationFinalizationFinished
716
728
  ) -> None:
717
- """Handles the task finalization finished event.
729
+ """Handles the task allocation finalization finished event.
718
730
 
719
731
  Doesn't raise any exceptions. Doesn't block.
720
732
  """
721
- task_info: TaskInfo = event.task_info
733
+ alloc_info: TaskAllocationInfo = event.alloc_info
722
734
  if not event.is_success:
723
- original_task_output: TaskOutput = task_info.output # Never None here
724
- task_info.output = TaskOutput.internal_error(
725
- allocation=task_info.allocation,
735
+ original_task_output: TaskAllocationOutput = (
736
+ alloc_info.output
737
+ ) # Never None here
738
+ alloc_info.output = TaskAllocationOutput.internal_error(
739
+ allocation=alloc_info.allocation,
726
740
  execution_start_time=original_task_output.execution_start_time,
727
741
  execution_end_time=original_task_output.execution_end_time,
728
742
  )
729
743
 
730
- logger: Any = task_allocation_logger(task_info.allocation, self._logger)
744
+ logger: Any = task_allocation_logger(alloc_info.allocation, self._logger)
731
745
  # Ignore task cancellation as it's technically finished at this point.
732
- task_info.is_completed = True
733
- emit_completed_task_metrics(
734
- task_info=task_info,
746
+ alloc_info.is_completed = True
747
+ emit_completed_task_allocation_metrics(
748
+ alloc_info=alloc_info,
735
749
  logger=logger,
736
750
  )
737
- # Reconciler will call .remove_task() once Server signals that it processed this update.
751
+ # Reconciler will call .remove_task_allocation() once Server signals that it processed this update.
738
752
  self._state_reporter.add_completed_task_result(
739
- _to_task_result_proto(task_info, logger)
753
+ _to_task_result_proto(alloc_info, logger)
740
754
  )
741
755
  self._state_reporter.schedule_state_report()
742
756
 
@@ -781,7 +795,7 @@ class FunctionExecutorController:
781
795
  The control loop must exit immediately after this method returns.
782
796
  Doesn't raise any exceptions.
783
797
 
784
- Server needs to wait until all the tasks its interested in got their outcomes reported
798
+ Server needs to wait until all the task allocations its interested in got their outcomes reported
785
799
  before calling the FE shutdown as we don't report anything on FE shutdown.
786
800
  """
787
801
  self._logger.info("function executor controller shutdown initiated")
@@ -860,12 +874,12 @@ def _termination_reason_to_short_name(value: FunctionExecutorTerminationReason)
860
874
  return _termination_reason_to_short_name_map.get(value, "UNEXPECTED")
861
875
 
862
876
 
863
- def _to_task_result_proto(task_info: TaskInfo, logger: Any) -> TaskResult:
864
- allocation: TaskAllocation = task_info.allocation
877
+ def _to_task_result_proto(alloc_info: TaskAllocationInfo, logger: Any) -> TaskResult:
878
+ allocation: TaskAllocation = alloc_info.allocation
865
879
  # Might be None if the task wasn't prepared successfully.
866
- input: Optional[TaskInput] = task_info.input
880
+ input: Optional[TaskAllocationInput] = alloc_info.input
867
881
  # Never None here as we're completing the task here.
868
- output: Optional[TaskOutput] = task_info.output
882
+ output: Optional[TaskAllocationOutput] = alloc_info.output
869
883
 
870
884
  execution_duration_ms: Optional[int] = None
871
885
  if (