indexify 0.3.30__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. indexify/cli/__init__.py +18 -0
  2. indexify/cli/build_image.py +51 -0
  3. indexify/cli/deploy.py +57 -0
  4. indexify/cli/executor.py +205 -0
  5. indexify/executor/{grpc/channel_manager.py → channel_manager.py} +17 -11
  6. indexify/executor/executor.py +57 -311
  7. indexify/executor/function_allowlist.py +59 -0
  8. indexify/executor/function_executor/function_executor.py +12 -6
  9. indexify/executor/function_executor/invocation_state_client.py +25 -3
  10. indexify/executor/function_executor/server/function_executor_server_factory.py +3 -3
  11. indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -11
  12. indexify/executor/function_executor_controller/__init__.py +13 -0
  13. indexify/executor/function_executor_controller/completed_task_metrics.py +82 -0
  14. indexify/executor/function_executor_controller/create_function_executor.py +154 -0
  15. indexify/executor/function_executor_controller/debug_event_loop.py +37 -0
  16. indexify/executor/function_executor_controller/destroy_function_executor.py +28 -0
  17. indexify/executor/function_executor_controller/downloads.py +199 -0
  18. indexify/executor/function_executor_controller/events.py +172 -0
  19. indexify/executor/function_executor_controller/function_executor_controller.py +759 -0
  20. indexify/executor/function_executor_controller/loggers.py +57 -0
  21. indexify/executor/function_executor_controller/message_validators.py +65 -0
  22. indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +68 -0
  23. indexify/executor/{metrics/downloader.py → function_executor_controller/metrics/downloads.py} +1 -3
  24. indexify/executor/function_executor_controller/metrics/function_executor_controller.py +60 -0
  25. indexify/executor/{function_executor/metrics/single_task_runner.py → function_executor_controller/metrics/run_task.py} +9 -3
  26. indexify/executor/function_executor_controller/metrics/upload_task_output.py +39 -0
  27. indexify/executor/function_executor_controller/prepare_task.py +38 -0
  28. indexify/executor/function_executor_controller/run_task.py +201 -0
  29. indexify/executor/function_executor_controller/task_info.py +33 -0
  30. indexify/executor/function_executor_controller/task_output.py +122 -0
  31. indexify/executor/function_executor_controller/upload_task_output.py +234 -0
  32. indexify/executor/host_resources/host_resources.py +20 -25
  33. indexify/executor/{grpc/metrics → metrics}/channel_manager.py +1 -1
  34. indexify/executor/metrics/executor.py +0 -47
  35. indexify/executor/{grpc/metrics → metrics}/state_reconciler.py +1 -1
  36. indexify/executor/{grpc/metrics → metrics}/state_reporter.py +1 -1
  37. indexify/executor/monitoring/health_checker/generic_health_checker.py +6 -59
  38. indexify/executor/monitoring/health_checker/health_checker.py +0 -11
  39. indexify/executor/{grpc/state_reconciler.py → state_reconciler.py} +139 -141
  40. indexify/executor/state_reporter.py +364 -0
  41. indexify/proto/executor_api.proto +67 -59
  42. indexify/proto/executor_api_pb2.py +52 -52
  43. indexify/proto/executor_api_pb2.pyi +125 -104
  44. indexify/proto/executor_api_pb2_grpc.py +0 -47
  45. {indexify-0.3.30.dist-info → indexify-0.4.2.dist-info}/METADATA +1 -3
  46. indexify-0.4.2.dist-info/RECORD +68 -0
  47. indexify-0.4.2.dist-info/entry_points.txt +3 -0
  48. indexify/cli/cli.py +0 -267
  49. indexify/executor/api_objects.py +0 -92
  50. indexify/executor/downloader.py +0 -417
  51. indexify/executor/executor_flavor.py +0 -7
  52. indexify/executor/function_executor/function_executor_state.py +0 -107
  53. indexify/executor/function_executor/function_executor_states_container.py +0 -93
  54. indexify/executor/function_executor/function_executor_status.py +0 -95
  55. indexify/executor/function_executor/metrics/function_executor_state.py +0 -46
  56. indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -10
  57. indexify/executor/function_executor/single_task_runner.py +0 -345
  58. indexify/executor/function_executor/task_input.py +0 -21
  59. indexify/executor/function_executor/task_output.py +0 -105
  60. indexify/executor/grpc/function_executor_controller.py +0 -418
  61. indexify/executor/grpc/metrics/task_controller.py +0 -8
  62. indexify/executor/grpc/state_reporter.py +0 -314
  63. indexify/executor/grpc/task_controller.py +0 -508
  64. indexify/executor/metrics/task_fetcher.py +0 -21
  65. indexify/executor/metrics/task_reporter.py +0 -53
  66. indexify/executor/metrics/task_runner.py +0 -52
  67. indexify/executor/monitoring/function_allowlist.py +0 -25
  68. indexify/executor/runtime_probes.py +0 -68
  69. indexify/executor/task_fetcher.py +0 -96
  70. indexify/executor/task_reporter.py +0 -459
  71. indexify/executor/task_runner.py +0 -177
  72. indexify-0.3.30.dist-info/RECORD +0 -68
  73. indexify-0.3.30.dist-info/entry_points.txt +0 -3
  74. {indexify-0.3.30.dist-info → indexify-0.4.2.dist-info}/WHEEL +0 -0
@@ -11,15 +11,11 @@ from .function_executor_server_factory import (
11
11
  from .subprocess_function_executor_server import SubprocessFunctionExecutorServer
12
12
 
13
13
 
14
- def get_free_tcp_port(iface_name="localhost") -> int:
15
- tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
16
- tcp.bind((iface_name, 0))
17
- _, port = tcp.getsockname()
18
- tcp.close()
19
- return port
20
-
21
-
22
14
  class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
15
+ def __init__(self, verbose_logs: bool) -> None:
16
+ super().__init__()
17
+ self._verbose_logs = verbose_logs
18
+
23
19
  async def create(
24
20
  self, config: FunctionExecutorServerConfiguration, logger: Any
25
21
  ) -> SubprocessFunctionExecutorServer:
@@ -33,13 +29,15 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
33
29
  )
34
30
 
35
31
  try:
36
- port = get_free_tcp_port()
37
- logger.info("allocated function executor port", port=port)
32
+ port = _find_free_localhost_tcp_port()
38
33
  args = [
39
34
  f"--executor-id={config.executor_id}", # use = as executor_id can start with -
35
+ f"--function-executor-id={config.function_executor_id}",
40
36
  "--address",
41
37
  _server_address(port),
42
38
  ]
39
+ if self._verbose_logs:
40
+ args.append("--dev")
43
41
  # Run the process with our stdout, stderr. We want to see process logs and exceptions in our process output.
44
42
  # This is useful for dubugging. Customer function stdout and stderr is captured and returned in the response
45
43
  # so we won't see it in our process outputs. This is the right behavior as customer function stdout and stderr
@@ -91,10 +89,23 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
91
89
  )
92
90
 
93
91
 
92
+ # Function Executors are only listening on localhost so external connections to them are not possible.
93
+ # This is a security measure. Also Executor <-> Function Executor communication is always local and
94
+ # don't support Function Executors running on a different host.
95
+ _FUNCTION_EXECUTOR_SERVER_HOSTNAME = "localhost"
96
+
97
+
94
98
  def _server_address(port: int) -> str:
95
- return f"localhost:{port}"
99
+ return f"{_FUNCTION_EXECUTOR_SERVER_HOSTNAME}:{port}"
96
100
 
97
101
 
98
102
  def _new_process_group() -> None:
99
103
  """Creates a new process group with ID equal to the current process PID. POSIX only."""
100
104
  os.setpgid(0, 0)
105
+
106
+
107
+ def _find_free_localhost_tcp_port() -> int:
108
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
109
+ sock.bind((_FUNCTION_EXECUTOR_SERVER_HOSTNAME, 0))
110
+ _, port = sock.getsockname()
111
+ return port
@@ -0,0 +1,13 @@
1
+ from .function_executor_controller import FunctionExecutorController
2
+ from .loggers import function_executor_logger, task_logger
3
+ from .message_validators import validate_function_executor_description, validate_task
4
+ from .task_output import TaskOutput
5
+
6
+ __all__ = [
7
+ "function_executor_logger",
8
+ "task_logger",
9
+ "validate_function_executor_description",
10
+ "validate_task",
11
+ "FunctionExecutorController",
12
+ "TaskOutput",
13
+ ]
@@ -0,0 +1,82 @@
1
+ import time
2
+ from typing import Any
3
+
4
+ from indexify.proto.executor_api_pb2 import (
5
+ TaskFailureReason,
6
+ TaskOutcomeCode,
7
+ )
8
+
9
+ from .metrics.completed_task_metrics import (
10
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
11
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
12
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
13
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
14
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
15
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
16
+ METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
17
+ METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
18
+ METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
19
+ METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
20
+ metric_task_completion_latency,
21
+ metric_tasks_completed,
22
+ )
23
+ from .task_info import TaskInfo
24
+
25
+
26
+ def emit_completed_task_metrics(task_info: TaskInfo, logger: Any) -> None:
27
+ """Emits Prometheus metrics for a completed task.
28
+
29
+ Doesn't raise any exceptions.
30
+ """
31
+ logger = logger.bind(module=__name__)
32
+ metric_task_completion_latency.observe(time.monotonic() - task_info.start_time)
33
+
34
+ task_outcome_code: TaskOutcomeCode = task_info.output.outcome_code
35
+ task_failure_reason: TaskFailureReason = task_info.output.failure_reason
36
+ metric_tasks_completed.labels(
37
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
38
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
39
+ ).inc()
40
+ if task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_SUCCESS:
41
+ metric_tasks_completed.labels(
42
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
43
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
44
+ ).inc()
45
+ elif task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
46
+ if task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR:
47
+ metric_tasks_completed.labels(
48
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
49
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
50
+ ).inc()
51
+ elif (
52
+ task_failure_reason
53
+ == TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED
54
+ ):
55
+ metric_tasks_completed.labels(
56
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
57
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
58
+ ).inc()
59
+ elif (
60
+ task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED
61
+ ):
62
+ metric_tasks_completed.labels(
63
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
64
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
65
+ ).inc()
66
+ elif task_failure_reason in [
67
+ TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
68
+ TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
69
+ ]:
70
+ metric_tasks_completed.labels(
71
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
72
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
73
+ ).inc()
74
+ else:
75
+ metric_tasks_completed.labels(
76
+ outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
77
+ failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
78
+ ).inc()
79
+ logger.warning(
80
+ "unexpected task failure reason",
81
+ failure_reason=TaskFailureReason.Name(task_failure_reason),
82
+ )
@@ -0,0 +1,154 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import Any, Optional
4
+
5
+ from tensorlake.function_executor.proto.function_executor_pb2 import (
6
+ InitializeRequest,
7
+ SerializedObject,
8
+ )
9
+
10
+ from indexify.executor.blob_store.blob_store import BLOBStore
11
+ from indexify.executor.function_executor.function_executor import (
12
+ FunctionError,
13
+ FunctionExecutor,
14
+ FunctionTimeoutError,
15
+ )
16
+ from indexify.executor.function_executor.server.function_executor_server_factory import (
17
+ FunctionExecutorServerConfiguration,
18
+ FunctionExecutorServerFactory,
19
+ )
20
+ from indexify.proto.executor_api_pb2 import (
21
+ FunctionExecutorDescription,
22
+ FunctionExecutorTerminationReason,
23
+ )
24
+
25
+ from .downloads import download_graph
26
+ from .events import FunctionExecutorCreated
27
+
28
+
29
+ async def create_function_executor(
30
+ function_executor_description: FunctionExecutorDescription,
31
+ function_executor_server_factory: FunctionExecutorServerFactory,
32
+ blob_store: BLOBStore,
33
+ executor_id: str,
34
+ base_url: str,
35
+ config_path: str,
36
+ cache_path: Path,
37
+ logger: Any,
38
+ ) -> FunctionExecutorCreated:
39
+ """Creates a function executor.
40
+
41
+ Doesn't raise any exceptions.
42
+ """
43
+ logger = logger.bind(module=__name__)
44
+ try:
45
+ function_executor: FunctionExecutor = await _create_function_executor(
46
+ function_executor_description=function_executor_description,
47
+ function_executor_server_factory=function_executor_server_factory,
48
+ blob_store=blob_store,
49
+ executor_id=executor_id,
50
+ base_url=base_url,
51
+ config_path=config_path,
52
+ cache_path=cache_path,
53
+ logger=logger,
54
+ )
55
+ return FunctionExecutorCreated(function_executor)
56
+ except FunctionTimeoutError as e:
57
+ return FunctionExecutorCreated(
58
+ function_executor=None,
59
+ function_error=e,
60
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT,
61
+ )
62
+ except FunctionError as e:
63
+ return FunctionExecutorCreated(
64
+ function_executor=None,
65
+ function_error=e,
66
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR,
67
+ )
68
+ except BaseException as e:
69
+ if isinstance(e, asyncio.CancelledError):
70
+ logger.info("function executor startup was cancelled")
71
+ return FunctionExecutorCreated(
72
+ function_executor=None,
73
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE,
74
+ )
75
+ else:
76
+ logger.error(
77
+ "failed to create function executor due to platform error",
78
+ exc_info=e,
79
+ )
80
+ return FunctionExecutorCreated(
81
+ function_executor=None,
82
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
83
+ )
84
+
85
+
86
+ async def _create_function_executor(
87
+ function_executor_description: FunctionExecutorDescription,
88
+ function_executor_server_factory: FunctionExecutorServerFactory,
89
+ blob_store: BLOBStore,
90
+ executor_id: str,
91
+ base_url: str,
92
+ config_path: str,
93
+ cache_path: Path,
94
+ logger: Any,
95
+ ) -> FunctionExecutor:
96
+ """Creates a function executor.
97
+
98
+ Raises Exception on platform error.
99
+ Raises FunctionError if customer code failed during FE creation.
100
+ """
101
+ graph: SerializedObject = await download_graph(
102
+ function_executor_description=function_executor_description,
103
+ cache_path=cache_path,
104
+ blob_store=blob_store,
105
+ logger=logger,
106
+ )
107
+
108
+ config: FunctionExecutorServerConfiguration = FunctionExecutorServerConfiguration(
109
+ executor_id=executor_id,
110
+ function_executor_id=function_executor_description.id,
111
+ namespace=function_executor_description.namespace,
112
+ graph_name=function_executor_description.graph_name,
113
+ graph_version=function_executor_description.graph_version,
114
+ function_name=function_executor_description.function_name,
115
+ image_uri=None,
116
+ secret_names=list(function_executor_description.secret_names),
117
+ cpu_ms_per_sec=function_executor_description.resources.cpu_ms_per_sec,
118
+ memory_bytes=function_executor_description.resources.memory_bytes,
119
+ disk_bytes=function_executor_description.resources.disk_bytes,
120
+ gpu_count=function_executor_description.resources.gpu_count,
121
+ )
122
+ if function_executor_description.HasField("image_uri"):
123
+ config.image_uri = function_executor_description.image_uri
124
+
125
+ initialize_request: InitializeRequest = InitializeRequest(
126
+ namespace=function_executor_description.namespace,
127
+ graph_name=function_executor_description.graph_name,
128
+ graph_version=function_executor_description.graph_version,
129
+ function_name=function_executor_description.function_name,
130
+ graph=graph,
131
+ )
132
+ customer_code_timeout_sec: Optional[float] = None
133
+ if function_executor_description.HasField("customer_code_timeout_ms"):
134
+ customer_code_timeout_sec = (
135
+ function_executor_description.customer_code_timeout_ms / 1000.0
136
+ )
137
+
138
+ function_executor: FunctionExecutor = FunctionExecutor(
139
+ server_factory=function_executor_server_factory, logger=logger
140
+ )
141
+
142
+ try:
143
+ # Raises FunctionError if initialization failed in customer code or customer code timed out.
144
+ await function_executor.initialize(
145
+ config=config,
146
+ initialize_request=initialize_request,
147
+ base_url=base_url,
148
+ config_path=config_path,
149
+ customer_code_timeout_sec=customer_code_timeout_sec,
150
+ )
151
+ return function_executor
152
+ except BaseException: # includes asyncio.CancelledError and anything else
153
+ await function_executor.destroy()
154
+ raise
@@ -0,0 +1,37 @@
1
+ import os
2
+ from typing import Any, List
3
+
4
+ from .events import BaseEvent
5
+
6
+ _DEBUG_EVENT_LOOP: bool = (
7
+ os.getenv("INDEXIFY_FUNCTION_EXECUTOR_CONTROLLER_DEBUG_EVENT_LOOP", "0")
8
+ ) == "1"
9
+
10
+
11
+ def debug_print_processing_event(event: BaseEvent, logger: Any) -> None:
12
+ if _DEBUG_EVENT_LOOP:
13
+ logger.debug(
14
+ "processing event in control loop",
15
+ fe_event=str(event),
16
+ )
17
+
18
+
19
+ def debug_print_adding_event(event: BaseEvent, source: str, logger: Any) -> None:
20
+ if _DEBUG_EVENT_LOOP:
21
+ logger.debug(
22
+ "adding event to control loop",
23
+ source=source,
24
+ fe_event=str(event),
25
+ )
26
+
27
+
28
+ def debug_print_events(events: List[BaseEvent], logger: Any) -> None:
29
+ if _DEBUG_EVENT_LOOP:
30
+ if len(events) == 0:
31
+ logger.debug("no events n control loop")
32
+ else:
33
+ logger.debug(
34
+ "events in control loop",
35
+ count=len(events),
36
+ fe_events=[str(event) for event in events],
37
+ )
@@ -0,0 +1,28 @@
1
+ from typing import Any, Optional
2
+
3
+ from indexify.executor.function_executor.function_executor import FunctionExecutor
4
+ from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
5
+
6
+ from .events import FunctionExecutorDestroyed
7
+
8
+
9
+ async def destroy_function_executor(
10
+ function_executor: Optional[FunctionExecutor],
11
+ termination_reason: FunctionExecutorTerminationReason,
12
+ logger: Any,
13
+ ) -> FunctionExecutorDestroyed:
14
+ """Destroys a function executor.
15
+
16
+ Doesn't raise any exceptions.
17
+ """
18
+ logger = logger.bind(module=__name__)
19
+
20
+ if function_executor is not None:
21
+ logger.info(
22
+ "destroying function executor",
23
+ )
24
+ await function_executor.destroy()
25
+
26
+ return FunctionExecutorDestroyed(
27
+ is_success=True, termination_reason=termination_reason
28
+ )
@@ -0,0 +1,199 @@
1
+ import asyncio
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Any, Optional
5
+
6
+ import nanoid
7
+ from tensorlake.function_executor.proto.function_executor_pb2 import SerializedObject
8
+
9
+ from indexify.executor.blob_store.blob_store import BLOBStore
10
+ from indexify.proto.executor_api_pb2 import (
11
+ DataPayload,
12
+ DataPayloadEncoding,
13
+ FunctionExecutorDescription,
14
+ )
15
+
16
+ from .metrics.downloads import (
17
+ metric_graph_download_errors,
18
+ metric_graph_download_latency,
19
+ metric_graph_downloads,
20
+ metric_graphs_from_cache,
21
+ metric_reducer_init_value_download_errors,
22
+ metric_reducer_init_value_download_latency,
23
+ metric_reducer_init_value_downloads,
24
+ metric_task_input_download_errors,
25
+ metric_task_input_download_latency,
26
+ metric_task_input_downloads,
27
+ metric_tasks_downloading_graphs,
28
+ metric_tasks_downloading_inputs,
29
+ metric_tasks_downloading_reducer_init_value,
30
+ )
31
+
32
+
33
+ async def download_graph(
34
+ function_executor_description: FunctionExecutorDescription,
35
+ cache_path: Path,
36
+ blob_store: BLOBStore,
37
+ logger: Any,
38
+ ) -> SerializedObject:
39
+ logger = logger.bind(module=__name__)
40
+ with (
41
+ metric_graph_download_errors.count_exceptions(),
42
+ metric_tasks_downloading_graphs.track_inprogress(),
43
+ metric_graph_download_latency.time(),
44
+ ):
45
+ metric_graph_downloads.inc()
46
+ return await _download_graph(
47
+ function_executor_description=function_executor_description,
48
+ cache_path=cache_path,
49
+ blob_store=blob_store,
50
+ logger=logger,
51
+ )
52
+
53
+
54
+ async def download_input(
55
+ data_payload: DataPayload,
56
+ blob_store: BLOBStore,
57
+ logger: Any,
58
+ ) -> SerializedObject:
59
+ logger = logger.bind(module=__name__)
60
+ with (
61
+ metric_task_input_download_errors.count_exceptions(),
62
+ metric_tasks_downloading_inputs.track_inprogress(),
63
+ metric_task_input_download_latency.time(),
64
+ ):
65
+ metric_task_input_downloads.inc()
66
+ return await _download_input(
67
+ data_payload=data_payload,
68
+ blob_store=blob_store,
69
+ logger=logger,
70
+ )
71
+
72
+
73
+ async def download_init_value(
74
+ data_payload: DataPayload,
75
+ blob_store: BLOBStore,
76
+ logger: Any,
77
+ ) -> SerializedObject:
78
+ logger = logger.bind(module=__name__)
79
+ with (
80
+ metric_reducer_init_value_download_errors.count_exceptions(),
81
+ metric_tasks_downloading_reducer_init_value.track_inprogress(),
82
+ metric_reducer_init_value_download_latency.time(),
83
+ ):
84
+ metric_reducer_init_value_downloads.inc()
85
+ return await _download_input(
86
+ data_payload=data_payload,
87
+ blob_store=blob_store,
88
+ logger=logger,
89
+ )
90
+
91
+
92
+ async def _download_input(
93
+ data_payload: DataPayload,
94
+ blob_store: BLOBStore,
95
+ logger: Any,
96
+ ) -> SerializedObject:
97
+ data: bytes = await blob_store.get(uri=data_payload.uri, logger=logger)
98
+ return _serialized_object_from_data_payload_proto(
99
+ data_payload=data_payload,
100
+ data=data,
101
+ )
102
+
103
+
104
+ async def _download_graph(
105
+ function_executor_description: FunctionExecutorDescription,
106
+ cache_path: Path,
107
+ blob_store: BLOBStore,
108
+ logger: Any,
109
+ ) -> SerializedObject:
110
+ # Cache graph to reduce load on the server.
111
+ graph_path = os.path.join(
112
+ str(cache_path),
113
+ "graph_cache",
114
+ function_executor_description.namespace,
115
+ function_executor_description.graph_name,
116
+ function_executor_description.graph_version,
117
+ )
118
+ # Filesystem operations are synchronous.
119
+ # Run in a separate thread to not block the main event loop.
120
+ graph: Optional[SerializedObject] = await asyncio.to_thread(
121
+ _read_cached_graph, path=graph_path
122
+ )
123
+ if graph is not None:
124
+ metric_graphs_from_cache.inc()
125
+ return graph
126
+
127
+ data: bytes = await blob_store.get(
128
+ uri=function_executor_description.graph.uri, logger=logger
129
+ )
130
+ graph = _serialized_object_from_data_payload_proto(
131
+ data_payload=function_executor_description.graph,
132
+ data=data,
133
+ )
134
+
135
+ # Filesystem operations are synchronous.
136
+ # Run in a separate thread to not block the main event loop.
137
+ # We don't need to wait for the write completion so we use create_task.
138
+ asyncio.create_task(
139
+ asyncio.to_thread(
140
+ _write_cached_graph, path=graph_path, graph=graph, cache_path=cache_path
141
+ ),
142
+ name="graph cache write",
143
+ )
144
+
145
+ return graph
146
+
147
+
148
+ def _read_cached_graph(path: str) -> Optional[SerializedObject]:
149
+ if not os.path.exists(path):
150
+ return None
151
+
152
+ with open(path, "rb") as f:
153
+ return SerializedObject.FromString(f.read())
154
+
155
+
156
+ def _write_cached_graph(path: str, graph: SerializedObject, cache_path: Path) -> None:
157
+ if os.path.exists(path):
158
+ # Another task already cached the graph.
159
+ return None
160
+
161
+ tmp_path = os.path.join(str(cache_path), "task_graph_cache", nanoid.generate())
162
+ os.makedirs(os.path.dirname(tmp_path), exist_ok=True)
163
+ with open(tmp_path, "wb") as f:
164
+ f.write(graph.SerializeToString())
165
+ os.makedirs(os.path.dirname(path), exist_ok=True)
166
+ # Atomically rename the fully written file at tmp path.
167
+ # This allows us to not use any locking because file link/unlink
168
+ # are atomic operations at filesystem level.
169
+ # This also allows to share the same cache between multiple Executors.
170
+ os.replace(tmp_path, path)
171
+
172
+
173
+ def _serialized_object_from_data_payload_proto(
174
+ data_payload: DataPayload, data: bytes
175
+ ) -> SerializedObject:
176
+ """Converts the given data payload and its data into SerializedObject accepted by Function Executor.
177
+
178
+ Raises ValueError if the supplied data payload can't be converted into serialized object.
179
+ """
180
+ if data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE:
181
+ return SerializedObject(
182
+ bytes=data,
183
+ content_type="application/octet-stream",
184
+ )
185
+ elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT:
186
+ return SerializedObject(
187
+ content_type="text/plain",
188
+ string=data.decode("utf-8"),
189
+ )
190
+ elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON:
191
+ result = SerializedObject(
192
+ content_type="application/json",
193
+ string=data.decode("utf-8"),
194
+ )
195
+ return result
196
+
197
+ raise ValueError(
198
+ f"Can't convert data payload {data_payload} into serialized object"
199
+ )