indexify 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. indexify/cli/executor.py +2 -9
  2. indexify/executor/blob_store/blob_store.py +110 -26
  3. indexify/executor/blob_store/local_fs_blob_store.py +41 -1
  4. indexify/executor/blob_store/metrics/blob_store.py +87 -15
  5. indexify/executor/blob_store/s3_blob_store.py +112 -1
  6. indexify/executor/function_executor/function_executor.py +32 -56
  7. indexify/executor/function_executor/invocation_state_client.py +10 -3
  8. indexify/executor/function_executor/server/function_executor_server_factory.py +0 -1
  9. indexify/executor/function_executor_controller/create_function_executor.py +129 -116
  10. indexify/executor/function_executor_controller/downloads.py +34 -86
  11. indexify/executor/function_executor_controller/events.py +13 -7
  12. indexify/executor/function_executor_controller/finalize_task.py +184 -0
  13. indexify/executor/function_executor_controller/function_executor_controller.py +121 -78
  14. indexify/executor/function_executor_controller/message_validators.py +10 -3
  15. indexify/executor/function_executor_controller/metrics/downloads.py +8 -52
  16. indexify/executor/function_executor_controller/metrics/finalize_task.py +20 -0
  17. indexify/executor/function_executor_controller/metrics/prepare_task.py +18 -0
  18. indexify/executor/function_executor_controller/prepare_task.py +232 -14
  19. indexify/executor/function_executor_controller/run_task.py +77 -61
  20. indexify/executor/function_executor_controller/task_info.py +4 -7
  21. indexify/executor/function_executor_controller/task_input.py +21 -0
  22. indexify/executor/function_executor_controller/task_output.py +26 -35
  23. indexify/executor/function_executor_controller/terminate_function_executor.py +6 -1
  24. indexify/executor/logging.py +69 -0
  25. indexify/executor/monitoring/metrics.py +22 -0
  26. indexify/proto/executor_api.proto +11 -3
  27. indexify/proto/executor_api_pb2.py +54 -54
  28. indexify/proto/executor_api_pb2.pyi +8 -1
  29. {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/METADATA +6 -6
  30. {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/RECORD +32 -30
  31. indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -21
  32. indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -39
  33. indexify/executor/function_executor_controller/upload_task_output.py +0 -274
  34. {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/WHEEL +0 -0
  35. {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,184 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any
4
+
5
+ from indexify.executor.blob_store.blob_store import BLOBStore
6
+ from indexify.proto.executor_api_pb2 import (
7
+ TaskFailureReason,
8
+ TaskOutcomeCode,
9
+ )
10
+
11
+ from .events import TaskFinalizationFinished
12
+ from .metrics.finalize_task import (
13
+ metric_task_finalization_errors,
14
+ metric_task_finalization_latency,
15
+ metric_task_finalizations,
16
+ metric_tasks_finalizing,
17
+ )
18
+ from .task_info import TaskInfo
19
+ from .task_input import TaskInput
20
+ from .task_output import TaskOutput
21
+
22
+
23
+ async def finalize_task(
24
+ task_info: TaskInfo, blob_store: BLOBStore, logger: Any
25
+ ) -> TaskFinalizationFinished:
26
+ """Prepares the task output for getting it reported to Server.
27
+
28
+ The task output is either coming from a failed task or from its finished execution on the Function Executor.
29
+ Doesn't raise any Exceptions.
30
+ """
31
+ logger = logger.bind(module=__name__)
32
+ start_time = time.monotonic()
33
+
34
+ with (
35
+ metric_tasks_finalizing.track_inprogress(),
36
+ metric_task_finalization_latency.time(),
37
+ metric_task_finalization_errors.count_exceptions(),
38
+ ):
39
+ metric_task_finalizations.inc()
40
+ try:
41
+ await _finalize_task_output(
42
+ task_info=task_info,
43
+ blob_store=blob_store,
44
+ logger=logger,
45
+ )
46
+ logger.info(
47
+ "task finalized",
48
+ duration=time.monotonic() - start_time,
49
+ )
50
+ return TaskFinalizationFinished(task_info=task_info, is_success=True)
51
+ except asyncio.CancelledError:
52
+ return TaskFinalizationFinished(task_info=task_info, is_success=False)
53
+ except BaseException as e:
54
+ logger.error(
55
+ "failed to finalize task",
56
+ exc_info=e,
57
+ duration=time.monotonic() - start_time,
58
+ )
59
+ return TaskFinalizationFinished(task_info=task_info, is_success=False)
60
+
61
+
62
+ class _TaskOutputSummary:
63
+ def __init__(self):
64
+ self.output_count: int = 0
65
+ self.output_bytes: int = 0
66
+ self.invocation_error_output_count: int = 0
67
+ self.invocation_error_output_bytes: int = 0
68
+ self.next_functions_count: int = 0
69
+
70
+
71
+ async def _finalize_task_output(
72
+ task_info: TaskInfo, blob_store: BLOBStore, logger: Any
73
+ ) -> None:
74
+ """Finalizes the task output.
75
+
76
+ Raises exception on error."""
77
+ if task_info.input is None:
78
+ raise Exception(
79
+ "task input is None, this should never happen",
80
+ )
81
+ if task_info.output is None:
82
+ raise Exception(
83
+ "task output is None, this should never happen",
84
+ )
85
+
86
+ input: TaskInput = task_info.input
87
+ output: TaskOutput = task_info.output
88
+
89
+ output_summary: _TaskOutputSummary = _task_output_summary(output)
90
+ logger.info(
91
+ "task output summary",
92
+ output_count=output_summary.output_count,
93
+ output_bytes=output_summary.output_bytes,
94
+ invocation_error_output_count=output_summary.invocation_error_output_count,
95
+ invocation_error_output_bytes=output_summary.invocation_error_output_bytes,
96
+ next_functions_count=output_summary.next_functions_count,
97
+ )
98
+
99
+ _log_function_metrics(output, logger)
100
+
101
+ if output.outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_SUCCESS:
102
+ if len(output.uploaded_function_outputs_blob.chunks) == 0:
103
+ # No output from function, usually means it returns None.
104
+ await blob_store.abort_multipart_upload(
105
+ uri=input.function_outputs_blob_uri,
106
+ upload_id=input.function_outputs_blob_upload_id,
107
+ logger=logger,
108
+ )
109
+ else:
110
+ await blob_store.complete_multipart_upload(
111
+ uri=input.function_outputs_blob_uri,
112
+ upload_id=input.function_outputs_blob_upload_id,
113
+ parts_etags=[
114
+ blob_chunk.etag
115
+ for blob_chunk in output.uploaded_function_outputs_blob.chunks
116
+ ],
117
+ logger=logger,
118
+ )
119
+ await blob_store.abort_multipart_upload(
120
+ uri=input.invocation_error_blob_uri,
121
+ upload_id=input.invocation_error_blob_upload_id,
122
+ logger=logger,
123
+ )
124
+ elif output.outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
125
+ await blob_store.abort_multipart_upload(
126
+ uri=input.function_outputs_blob_uri,
127
+ upload_id=input.function_outputs_blob_upload_id,
128
+ logger=logger,
129
+ )
130
+ if (
131
+ output.failure_reason
132
+ == TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR
133
+ ) and len(output.uploaded_invocation_error_blob.chunks) != 0:
134
+ await blob_store.complete_multipart_upload(
135
+ uri=input.invocation_error_blob_uri,
136
+ upload_id=input.invocation_error_blob_upload_id,
137
+ parts_etags=[
138
+ blob_chunk.etag
139
+ for blob_chunk in output.uploaded_invocation_error_blob.chunks
140
+ ],
141
+ logger=logger,
142
+ )
143
+ else:
144
+ await blob_store.abort_multipart_upload(
145
+ uri=input.invocation_error_blob_uri,
146
+ upload_id=input.invocation_error_blob_upload_id,
147
+ logger=logger,
148
+ )
149
+ else:
150
+ raise ValueError(
151
+ f"Unexpected outcome code: {TaskOutcomeCode.Name(output.outcome_code)}"
152
+ )
153
+
154
+
155
+ def _task_output_summary(task_output: TaskOutput) -> _TaskOutputSummary:
156
+ summary: _TaskOutputSummary = _TaskOutputSummary()
157
+
158
+ for output in task_output.function_outputs:
159
+ summary.output_count += 1
160
+ summary.output_bytes += output.manifest.size
161
+
162
+ if task_output.invocation_error_output is not None:
163
+ summary.invocation_error_output_count = 1
164
+ summary.invocation_error_output_bytes = (
165
+ task_output.invocation_error_output.manifest.size
166
+ )
167
+
168
+ summary.next_functions_count = len(task_output.next_functions)
169
+
170
+ return summary
171
+
172
+
173
+ # Temporary workaround is logging customer metrics until we store them somewhere
174
+ # for future retrieval and processing.
175
+ def _log_function_metrics(output: TaskOutput, logger: Any):
176
+ if output.metrics is None:
177
+ return
178
+
179
+ for counter_name, counter_value in output.metrics.counters.items():
180
+ logger.info(
181
+ "function_metric", counter_name=counter_name, counter_value=counter_value
182
+ )
183
+ for timer_name, timer_value in output.metrics.timers.items():
184
+ logger.info("function_metric", timer_name=timer_name, timer_value=timer_value)
@@ -6,6 +6,11 @@ from enum import Enum
6
6
  from pathlib import Path
7
7
  from typing import Any, Dict, List, Optional
8
8
 
9
+ from tensorlake.function_executor.proto.function_executor_pb2 import (
10
+ SerializedObjectEncoding,
11
+ SerializedObjectInsideBLOB,
12
+ )
13
+
9
14
  from indexify.executor.blob_store.blob_store import BLOBStore
10
15
  from indexify.executor.function_executor.function_executor import FunctionExecutor
11
16
  from indexify.executor.function_executor.health_checker import HealthCheckResult
@@ -14,11 +19,12 @@ from indexify.executor.function_executor.server.function_executor_server_factory
14
19
  )
15
20
  from indexify.executor.state_reporter import ExecutorStateReporter
16
21
  from indexify.proto.executor_api_pb2 import (
22
+ DataPayload,
23
+ DataPayloadEncoding,
17
24
  FunctionExecutorDescription,
18
25
  FunctionExecutorState,
19
26
  FunctionExecutorStatus,
20
27
  FunctionExecutorTerminationReason,
21
- FunctionExecutorUpdate,
22
28
  TaskAllocation,
23
29
  TaskResult,
24
30
  )
@@ -38,10 +44,10 @@ from .events import (
38
44
  ScheduleTaskExecution,
39
45
  ShutdownInitiated,
40
46
  TaskExecutionFinished,
41
- TaskOutputUploadFinished,
47
+ TaskFinalizationFinished,
42
48
  TaskPreparationFinished,
43
49
  )
44
- from .function_executor_startup_output import FunctionExecutorStartupOutput
50
+ from .finalize_task import finalize_task
45
51
  from .loggers import function_executor_logger, task_allocation_logger
46
52
  from .metrics.function_executor_controller import (
47
53
  METRIC_FUNCTION_EXECUTORS_WITH_STATE_LABEL_NOT_STARTED,
@@ -60,9 +66,9 @@ from .metrics.function_executor_controller import (
60
66
  from .prepare_task import prepare_task
61
67
  from .run_task import run_task_on_function_executor
62
68
  from .task_info import TaskInfo
69
+ from .task_input import TaskInput
63
70
  from .task_output import TaskOutput
64
71
  from .terminate_function_executor import terminate_function_executor
65
- from .upload_task_output import upload_task_output
66
72
 
67
73
 
68
74
  # Actual FE controller states, they are a bit different from statuses reported to the Server.
@@ -242,10 +248,7 @@ class FunctionExecutorController:
242
248
  aio=next_aio,
243
249
  on_exception=FunctionExecutorCreated(
244
250
  function_executor=None,
245
- output=FunctionExecutorStartupOutput(
246
- function_executor_description=self._fe_description,
247
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR,
248
- ),
251
+ fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR,
249
252
  ),
250
253
  )
251
254
 
@@ -326,7 +329,7 @@ class FunctionExecutorController:
326
329
 
327
330
  try:
328
331
  if event.event_type == EventType.SHUTDOWN_INITIATED:
329
- return await self._shutdown_no_exceptions(event)
332
+ return await self._shutdown(event)
330
333
 
331
334
  with metric_control_loop_handle_event_latency.time():
332
335
  self._handle_event(event)
@@ -338,6 +341,8 @@ class FunctionExecutorController:
338
341
  exc_info=e,
339
342
  event_type=event.event_type.name,
340
343
  )
344
+ if event.event_type == EventType.SHUTDOWN_INITIATED:
345
+ return # Unexpected exception during shutdown, should return anyway.
341
346
 
342
347
  def _handle_event(self, event: BaseEvent) -> None:
343
348
  """Handles the event.
@@ -355,7 +360,7 @@ class FunctionExecutorController:
355
360
  elif event.event_type == EventType.TASK_EXECUTION_FINISHED:
356
361
  return self._handle_event_task_execution_finished(event)
357
362
  elif event.event_type == EventType.TASK_OUTPUT_UPLOAD_FINISHED:
358
- return self._handle_event_task_output_upload_finished(event)
363
+ return self._handle_event_task_finalization_finished(event)
359
364
 
360
365
  self._logger.warning(
361
366
  "unexpected event type received", event_type=event.event_type.name
@@ -402,7 +407,7 @@ class FunctionExecutorController:
402
407
  """Spawns an aio task for the supplied coroutine.
403
408
 
404
409
  The coroutine should return an event that will be added to the FE controller events.
405
- The coroutine should not raise any exceptions.
410
+ The coroutine should not raise any exceptions including BaseException and asyncio.CancelledError.
406
411
  on_exception event will be added to the FE controller events if the aio task raises an unexpected exception.
407
412
  on_exception is required to not silently stall the task processing due to an unexpected exception.
408
413
  If task_info is not None, the aio task will be associated with the task_info while the aio task is running.
@@ -417,8 +422,6 @@ class FunctionExecutorController:
417
422
  async def coroutine_wrapper() -> None:
418
423
  try:
419
424
  self._add_event(await aio, source=aio_task_name)
420
- except asyncio.CancelledError:
421
- pass # Expected exception on aio task cancellation.
422
425
  except BaseException as e:
423
426
  logger.error(
424
427
  "unexpected exception in aio task",
@@ -449,15 +452,6 @@ class FunctionExecutorController:
449
452
 
450
453
  Doesn't raise any exceptions. Doesn't block.
451
454
  """
452
- self._state_reporter.add_function_executor_update(
453
- FunctionExecutorUpdate(
454
- description=self._fe_description,
455
- startup_stdout=event.output.stdout,
456
- startup_stderr=event.output.stderr,
457
- )
458
- )
459
- self._state_reporter.schedule_state_report()
460
-
461
455
  if event.function_executor is None:
462
456
  # Server needs to increment attempts counter for all the tasks that were pending while FE was starting up.
463
457
  # This prevents infinite retries if FEs consistently fail to start up.
@@ -474,11 +468,11 @@ class FunctionExecutorController:
474
468
  )
475
469
  task_info.output = TaskOutput.function_executor_startup_failed(
476
470
  allocation=task_info.allocation,
477
- fe_startup_output=event.output,
471
+ fe_termination_reason=event.fe_termination_reason,
478
472
  logger=task_logger,
479
473
  )
480
474
  self._start_termination(
481
- fe_termination_reason=event.output.termination_reason,
475
+ fe_termination_reason=event.fe_termination_reason,
482
476
  allocation_ids_caused_termination=allocation_ids_caused_termination,
483
477
  )
484
478
  return
@@ -559,16 +553,18 @@ class FunctionExecutorController:
559
553
  execution_start_time=None,
560
554
  execution_end_time=None,
561
555
  )
562
- self._start_task_output_upload(task_info)
556
+ self._start_task_finalization(task_info)
563
557
  return
558
+
564
559
  if not event.is_success:
560
+ # Failed to prepare the task inputs.
565
561
  task_info.output = TaskOutput.internal_error(
566
562
  allocation=task_info.allocation,
567
563
  # Task was prepared but never executed
568
564
  execution_start_time=None,
569
565
  execution_end_time=None,
570
566
  )
571
- self._start_task_output_upload(task_info)
567
+ self._start_task_finalization(task_info)
572
568
  return
573
569
 
574
570
  task_info.prepared_time = time.monotonic()
@@ -616,7 +612,7 @@ class FunctionExecutorController:
616
612
  execution_start_time=None,
617
613
  execution_end_time=None,
618
614
  )
619
- self._start_task_output_upload(task_info)
615
+ self._start_task_finalization(task_info)
620
616
  elif self._internal_state in [
621
617
  _FE_CONTROLLER_STATE.TERMINATING,
622
618
  _FE_CONTROLLER_STATE.TERMINATED,
@@ -626,7 +622,7 @@ class FunctionExecutorController:
626
622
  task_info.output = TaskOutput.function_executor_terminated(
627
623
  task_info.allocation
628
624
  )
629
- self._start_task_output_upload(task_info)
625
+ self._start_task_finalization(task_info)
630
626
  elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
631
627
  self._running_task = task_info
632
628
  next_aio = run_task_on_function_executor(
@@ -677,17 +673,26 @@ class FunctionExecutorController:
677
673
  ],
678
674
  )
679
675
 
680
- # Ignore is_cancelled because cancelling a task still involves uploading its output.
681
- # We'll just upload a real output instead of "task cancelled" output.
682
- # Adds TaskOutputUploadFinished event when done.
683
- self._start_task_output_upload(event.task_info)
676
+ task_info: TaskInfo = event.task_info
677
+ if task_info.output is None:
678
+ # `run_task_on_function_executor` guarantees that the output is set in
679
+ # all cases including task cancellations. If this didn't happen then some
680
+ # internal error occurred in our code.
681
+ task_info.output = TaskOutput.internal_error(
682
+ allocation=task_info.allocation,
683
+ execution_start_time=None,
684
+ execution_end_time=None,
685
+ )
684
686
 
685
- def _start_task_output_upload(self, task_info: TaskInfo) -> None:
686
- """Starts the task output upload for the given task.
687
+ self._start_task_finalization(task_info)
688
+
689
+ def _start_task_finalization(self, task_info: TaskInfo) -> None:
690
+ """Starts finalization for the given task.
687
691
 
688
692
  Doesn't raise any exceptions. Doesn't block.
693
+ task_info.output should not be None.
689
694
  """
690
- next_aio = upload_task_output(
695
+ next_aio = finalize_task(
691
696
  task_info=task_info,
692
697
  blob_store=self._blob_store,
693
698
  logger=task_allocation_logger(task_info.allocation, self._logger),
@@ -695,43 +700,37 @@ class FunctionExecutorController:
695
700
  self._spawn_aio_for_task(
696
701
  aio=next_aio,
697
702
  task_info=task_info,
698
- on_exception=TaskOutputUploadFinished(
703
+ on_exception=TaskFinalizationFinished(
699
704
  task_info=task_info, is_success=False
700
705
  ),
701
706
  )
702
707
 
703
- def _handle_event_task_output_upload_finished(
704
- self, event: TaskOutputUploadFinished
708
+ def _handle_event_task_finalization_finished(
709
+ self, event: TaskFinalizationFinished
705
710
  ) -> None:
706
- """Handles the task output upload finished event.
711
+ """Handles the task finalization finished event.
707
712
 
708
713
  Doesn't raise any exceptions. Doesn't block.
709
714
  """
710
715
  task_info: TaskInfo = event.task_info
711
716
  if not event.is_success:
712
- failed_to_upload_output: TaskOutput = task_info.output # Never None here
717
+ original_task_output: TaskOutput = task_info.output # Never None here
713
718
  task_info.output = TaskOutput.internal_error(
714
719
  allocation=task_info.allocation,
715
- execution_start_time=failed_to_upload_output.execution_start_time,
716
- execution_end_time=failed_to_upload_output.execution_end_time,
720
+ execution_start_time=original_task_output.execution_start_time,
721
+ execution_end_time=original_task_output.execution_end_time,
717
722
  )
718
723
 
719
- # Ignore task cancellation, we better report real task output to the server cause it's uploaded already.
720
- self._complete_task(event.task_info)
721
-
722
- def _complete_task(self, task_info: TaskInfo) -> None:
723
- """Marks the task as completed and reports it to the Server.
724
-
725
- Doesn't raise any exceptions. Doesn't block.
726
- """
724
+ logger: Any = task_allocation_logger(task_info.allocation, self._logger)
725
+ # Ignore task cancellation as it's technically finished at this point.
727
726
  task_info.is_completed = True
728
727
  emit_completed_task_metrics(
729
728
  task_info=task_info,
730
- logger=task_allocation_logger(task_info.allocation, self._logger),
729
+ logger=logger,
731
730
  )
732
731
  # Reconciler will call .remove_task() once Server signals that it processed this update.
733
732
  self._state_reporter.add_completed_task_result(
734
- _to_task_result_proto(task_info.output)
733
+ _to_task_result_proto(task_info, logger)
735
734
  )
736
735
  self._state_reporter.schedule_state_report()
737
736
 
@@ -769,16 +768,6 @@ class FunctionExecutorController:
769
768
  ),
770
769
  )
771
770
 
772
- async def _shutdown_no_exceptions(self, event: ShutdownInitiated) -> None:
773
- try:
774
- await self._shutdown(event)
775
- except BaseException as e:
776
- # This would result in resource leaks.
777
- self._logger.error(
778
- "unexpected exception in function executor controller shutdown, this should never happen",
779
- exc_info=e,
780
- )
781
-
782
771
  async def _shutdown(self, event: ShutdownInitiated) -> None:
783
772
  """Shuts down the Function Executor and frees all its resources.
784
773
 
@@ -865,7 +854,13 @@ def _termination_reason_to_short_name(value: FunctionExecutorTerminationReason)
865
854
  return _termination_reason_to_short_name_map.get(value, "UNEXPECTED")
866
855
 
867
856
 
868
- def _to_task_result_proto(output: TaskOutput) -> TaskResult:
857
+ def _to_task_result_proto(task_info: TaskInfo, logger: Any) -> TaskResult:
858
+ allocation: TaskAllocation = task_info.allocation
859
+ # Might be None if the task wasn't prepared successfully.
860
+ input: Optional[TaskInput] = task_info.input
861
+ # Never None here as we're completing the task here.
862
+ output: Optional[TaskOutput] = task_info.output
863
+
869
864
  execution_duration_ms: Optional[int] = None
870
865
  if (
871
866
  output.execution_start_time is not None
@@ -876,24 +871,72 @@ def _to_task_result_proto(output: TaskOutput) -> TaskResult:
876
871
  (output.execution_end_time - output.execution_start_time) * 1000
877
872
  )
878
873
 
879
- task_result = TaskResult(
880
- task_id=output.allocation.task.id,
881
- allocation_id=output.allocation.allocation_id,
882
- namespace=output.allocation.task.namespace,
883
- graph_name=output.allocation.task.graph_name,
884
- graph_version=output.allocation.task.graph_version,
885
- function_name=output.allocation.task.function_name,
886
- graph_invocation_id=output.allocation.task.graph_invocation_id,
874
+ invocation_error_output: Optional[DataPayload] = None
875
+ if output.invocation_error_output is not None:
876
+ # input can't be None if invocation_error_output is set because the task ran already.
877
+ invocation_error_output = _to_data_payload_proto(
878
+ so=output.invocation_error_output,
879
+ blob_uri=input.invocation_error_blob_uri,
880
+ logger=logger,
881
+ )
882
+
883
+ function_outputs: List[DataPayload] = []
884
+ for function_output in output.function_outputs:
885
+ # input can't be None if invocation_function_outputs is set because the task ran already.
886
+ function_output: SerializedObjectInsideBLOB
887
+ function_outputs.append(
888
+ _to_data_payload_proto(
889
+ so=function_output,
890
+ blob_uri=input.function_outputs_blob_uri,
891
+ logger=logger,
892
+ )
893
+ )
894
+
895
+ return TaskResult(
896
+ task_id=allocation.task.id,
897
+ allocation_id=allocation.allocation_id,
898
+ namespace=allocation.task.namespace,
899
+ graph_name=allocation.task.graph_name,
900
+ graph_version=allocation.task.graph_version,
901
+ function_name=allocation.task.function_name,
902
+ graph_invocation_id=allocation.task.graph_invocation_id,
887
903
  outcome_code=output.outcome_code,
888
904
  failure_reason=output.failure_reason,
889
905
  next_functions=output.next_functions,
890
- function_outputs=output.uploaded_data_payloads,
891
- invocation_error_output=output.uploaded_invocation_error_output,
906
+ function_outputs=function_outputs,
907
+ invocation_error_output=invocation_error_output,
892
908
  execution_duration_ms=execution_duration_ms,
893
909
  )
894
- if output.uploaded_stdout is not None:
895
- task_result.stdout.CopyFrom(output.uploaded_stdout)
896
- if output.uploaded_stderr is not None:
897
- task_result.stderr.CopyFrom(output.uploaded_stderr)
898
910
 
899
- return task_result
911
+
912
+ def _to_data_payload_proto(
913
+ so: SerializedObjectInsideBLOB,
914
+ blob_uri: str,
915
+ logger: Any,
916
+ ) -> DataPayload:
917
+ """Converts a serialized object inside BLOB to into a DataPayload."""
918
+ return DataPayload(
919
+ size=so.manifest.size,
920
+ sha256_hash=so.manifest.sha256_hash,
921
+ uri=blob_uri,
922
+ encoding=_to_data_payload_encoding(so.manifest.encoding, logger),
923
+ encoding_version=so.manifest.encoding_version,
924
+ offset=so.offset,
925
+ )
926
+
927
+
928
+ def _to_data_payload_encoding(
929
+ encoding: SerializedObjectEncoding, logger: Any
930
+ ) -> DataPayloadEncoding:
931
+ if encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE:
932
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
933
+ elif encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_JSON:
934
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
935
+ elif encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_TEXT:
936
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT
937
+ else:
938
+ logger.error(
939
+ "Unexpected encoding for SerializedObject",
940
+ encoding=SerializedObjectEncoding.Name(encoding),
941
+ )
942
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UNKNOWN
@@ -21,12 +21,10 @@ def validate_function_executor_description(
21
21
  validator.required_field("graph_name")
22
22
  validator.required_field("graph_version")
23
23
  validator.required_field("function_name")
24
- # image_uri is optional.
25
24
  # secret_names can be empty.
26
25
  validator.required_field("customer_code_timeout_ms")
27
26
  validator.required_field("graph")
28
27
  validator.required_field("resources")
29
- validator.required_field("output_payload_uri_prefix")
30
28
 
31
29
  _validate_data_payload(function_executor_description.graph)
32
30
 
@@ -81,4 +79,13 @@ def _validate_data_payload(data_payload: DataPayload) -> None:
81
79
 
82
80
  Raises ValueError if the DataPayload is not valid.
83
81
  """
84
- (MessageValidator(data_payload).required_field("uri").required_field("encoding"))
82
+ (
83
+ MessageValidator(data_payload)
84
+ .required_field("size")
85
+ .required_field("sha256_hash")
86
+ .required_field("uri")
87
+ .required_field("encoding")
88
+ # Ignored by Server right now and not set.
89
+ # .required_field("encoding_version")
90
+ .required_field("offset")
91
+ )
@@ -4,64 +4,20 @@ from indexify.executor.monitoring.metrics import latency_metric_for_fast_operati
4
4
 
5
5
  # Graph download metrics
6
6
  metric_graph_downloads: prometheus_client.Counter = prometheus_client.Counter(
7
- "task_graph_downloads",
8
- "Number of task graph downloads, including downloads served from local cache",
7
+ "graph_downloads",
8
+ "Number of graph downloads, including downloads served from local cache",
9
9
  )
10
10
  metric_graph_download_errors: prometheus_client.Counter = prometheus_client.Counter(
11
- "task_graph_download_errors",
12
- "Number of task download errors, including downloads served from local cache",
11
+ "graph_download_errors",
12
+ "Number of download errors, including downloads served from local cache",
13
13
  )
14
14
  metric_graphs_from_cache: prometheus_client.Counter = prometheus_client.Counter(
15
- "task_graph_downloads_from_cache",
16
- "Number of task graph downloads served from local cache",
15
+ "graph_downloads_from_cache",
16
+ "Number of graph downloads served from local cache",
17
17
  )
18
18
  metric_graph_download_latency: prometheus_client.Histogram = (
19
19
  latency_metric_for_fast_operation(
20
- "task_graph_download",
21
- "task graph download, including downloads served from local cache",
22
- )
23
- )
24
- metric_tasks_downloading_graphs: prometheus_client.Gauge = prometheus_client.Gauge(
25
- "tasks_downloading_graphs",
26
- "Number of tasks currently downloading their graphs, including local cache lookups",
27
- )
28
-
29
- # Task input download metrics
30
- metric_task_input_downloads: prometheus_client.Counter = prometheus_client.Counter(
31
- "task_input_downloads", "Number of task input downloads"
32
- )
33
- metric_task_input_download_errors: prometheus_client.Counter = (
34
- prometheus_client.Counter(
35
- "task_input_download_errors", "Number of task input download errors"
36
- )
37
- )
38
- metric_task_input_download_latency: prometheus_client.Histogram = (
39
- latency_metric_for_fast_operation("task_input_download", "task input download")
40
- )
41
- metric_tasks_downloading_inputs: prometheus_client.Gauge = prometheus_client.Gauge(
42
- "tasks_downloading_inputs", "Number of tasks currently downloading their inputs"
43
- )
44
-
45
- # Reducer init value download metrics
46
- metric_reducer_init_value_downloads: prometheus_client.Counter = (
47
- prometheus_client.Counter(
48
- "task_reducer_init_value_downloads", "Number of reducer init value downloads"
49
- )
50
- )
51
- metric_reducer_init_value_download_errors: prometheus_client.Counter = (
52
- prometheus_client.Counter(
53
- "task_reducer_init_value_download_errors",
54
- "Number of reducer init value download errors",
55
- )
56
- )
57
- metric_reducer_init_value_download_latency: prometheus_client.Histogram = (
58
- latency_metric_for_fast_operation(
59
- "task_reducer_init_value_download", "Task reducer init value download"
60
- )
61
- )
62
- metric_tasks_downloading_reducer_init_value: prometheus_client.Gauge = (
63
- prometheus_client.Gauge(
64
- "tasks_downloading_reducer_init_value",
65
- "Number of tasks currently downloading their reducer init values",
20
+ "graph_download",
21
+ "Graph download, including downloads served from local cache",
66
22
  )
67
23
  )