indexify 0.4.21__tar.gz → 0.4.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {indexify-0.4.21 → indexify-0.4.22}/PKG-INFO +3 -3
  2. {indexify-0.4.21 → indexify-0.4.22}/pyproject.toml +3 -3
  3. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/run_task.py +5 -4
  4. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/run_task.py +140 -48
  5. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/task_output.py +17 -0
  6. {indexify-0.4.21 → indexify-0.4.22}/README.md +0 -0
  7. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/cli/__init__.py +0 -0
  8. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/cli/build_image.py +0 -0
  9. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/cli/deploy.py +0 -0
  10. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/cli/executor.py +0 -0
  11. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/README.md +0 -0
  12. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/blob_store/blob_store.py +0 -0
  13. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
  14. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  15. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
  16. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/channel_manager.py +0 -0
  17. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/executor.py +0 -0
  18. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_allowlist.py +0 -0
  19. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/function_executor.py +0 -0
  20. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/health_checker.py +0 -0
  21. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  22. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  23. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  24. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  25. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  26. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  27. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  28. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  29. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  30. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
  31. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
  32. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -0
  33. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
  34. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
  35. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/events.py +0 -0
  36. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/function_executor_controller.py +0 -0
  37. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
  38. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
  39. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
  40. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
  41. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
  42. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
  43. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
  44. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
  45. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
  46. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +0 -0
  47. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
  48. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/host_resources/host_resources.py +0 -0
  49. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  50. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  51. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/metrics/channel_manager.py +0 -0
  52. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/metrics/executor.py +0 -0
  53. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/metrics/state_reconciler.py +0 -0
  54. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/metrics/state_reporter.py +0 -0
  55. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/handler.py +0 -0
  56. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  57. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  58. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  59. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/metrics/health_checker.py +0 -0
  60. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/metrics.py +0 -0
  61. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  62. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/server.py +0 -0
  63. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  64. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/state_reconciler.py +0 -0
  65. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/executor/state_reporter.py +0 -0
  66. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/proto/executor_api.proto +0 -0
  67. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2.py +0 -0
  68. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2.pyi +0 -0
  69. {indexify-0.4.21 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.21
3
+ Version: 0.4.22
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
17
- Requires-Dist: boto3 (>=1.39.14,<2.0.0)
17
+ Requires-Dist: boto3 (>=1.39.15,<2.0.0)
18
18
  Requires-Dist: docker (>=7.1.0,<8.0.0)
19
19
  Requires-Dist: httpx[http2] (==0.27.2)
20
20
  Requires-Dist: nanoid (>=2.0.0,<3.0.0)
@@ -22,7 +22,7 @@ Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
22
22
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
23
23
  Requires-Dist: pydantic (>=2.11,<3.0)
24
24
  Requires-Dist: requests (>=2.32.4,<3.0.0)
25
- Requires-Dist: tensorlake (==0.2.33)
25
+ Requires-Dist: tensorlake (==0.2.37)
26
26
  Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
27
27
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
28
28
  Description-Content-Type: text/markdown
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.4.21"
4
+ version = "0.4.22"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -26,10 +26,10 @@ httpx = { version = "0.27.2", extras = ["http2"] }
26
26
  pydantic = "^2.11"
27
27
  prometheus-client = "^0.22.1"
28
28
  psutil = "^7.0.0"
29
- boto3 = "^1.39.14"
29
+ boto3 = "^1.39.15"
30
30
  # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
31
31
  # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
32
- tensorlake = "0.2.33"
32
+ tensorlake = "0.2.37"
33
33
  # Uncomment the next line to use local tensorlake package (only for development!)
34
34
  # tensorlake = { path = "../tensorlake", develop = true }
35
35
  # grpcio is provided by tensorlake
@@ -6,23 +6,24 @@ from indexify.executor.monitoring.metrics import (
6
6
 
7
7
  metric_function_executor_run_task_rpcs: prometheus_client.Counter = (
8
8
  prometheus_client.Counter(
9
- "function_executor_run_task_rpcs", "Number of Function Executor run task RPCs"
9
+ "function_executor_run_task_rpcs",
10
+ "Number of Function Executor run task lifecycle RPC sequences",
10
11
  )
11
12
  )
12
13
  metric_function_executor_run_task_rpc_errors: prometheus_client.Counter = (
13
14
  prometheus_client.Counter(
14
15
  "function_executor_run_task_rpc_errors",
15
- "Number of Function Executor run task RPC errors",
16
+ "Number of Function Executor run task lifecycle RPC errors",
16
17
  )
17
18
  )
18
19
  metric_function_executor_run_task_rpc_latency: prometheus_client.Histogram = (
19
20
  latency_metric_for_customer_controlled_operation(
20
- "function_executor_run_task_rpc", "Function Executor run task RPC"
21
+ "function_executor_run_task_rpc", "Function Executor run task lifecycle RPC"
21
22
  )
22
23
  )
23
24
  metric_function_executor_run_task_rpcs_in_progress: prometheus_client.Gauge = (
24
25
  prometheus_client.Gauge(
25
26
  "function_executor_run_task_rpcs_in_progress",
26
- "Number of Function Executor run task RPCs in progress",
27
+ "Number of Function Executor run task lifecycle RPCs in progress",
27
28
  )
28
29
  )
@@ -6,9 +6,13 @@ from typing import Any, Optional
6
6
 
7
7
  import grpc
8
8
  from tensorlake.function_executor.proto.function_executor_pb2 import (
9
- RunTaskRequest,
10
- RunTaskResponse,
9
+ AwaitTaskProgress,
10
+ AwaitTaskRequest,
11
+ CreateTaskRequest,
12
+ DeleteTaskRequest,
13
+ FunctionInputs,
11
14
  SerializedObject,
15
+ Task,
12
16
  )
13
17
  from tensorlake.function_executor.proto.function_executor_pb2 import (
14
18
  TaskFailureReason as FETaskFailureReason,
@@ -16,6 +20,9 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
16
20
  from tensorlake.function_executor.proto.function_executor_pb2 import (
17
21
  TaskOutcomeCode as FETaskOutcomeCode,
18
22
  )
23
+ from tensorlake.function_executor.proto.function_executor_pb2 import (
24
+ TaskResult,
25
+ )
19
26
  from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
20
27
  FunctionExecutorStub,
21
28
  )
@@ -44,6 +51,9 @@ _ENABLE_INJECT_TASK_CANCELLATIONS = (
44
51
  os.getenv("INDEXIFY_INJECT_TASK_CANCELLATIONS", "0") == "1"
45
52
  )
46
53
 
54
+ _CREATE_TASK_TIMEOUT_SECS = 5
55
+ _DELETE_TASK_TIMEOUT_SECS = 5
56
+
47
57
 
48
58
  async def run_task_on_function_executor(
49
59
  task_info: TaskInfo, function_executor: FunctionExecutor, logger: Any
@@ -53,21 +63,21 @@ async def run_task_on_function_executor(
53
63
  Doesn't raise any exceptions.
54
64
  """
55
65
  logger = logger.bind(module=__name__)
56
- request: RunTaskRequest = RunTaskRequest(
66
+ task = Task(
67
+ task_id=task_info.allocation.task.id,
57
68
  namespace=task_info.allocation.task.namespace,
58
69
  graph_name=task_info.allocation.task.graph_name,
59
70
  graph_version=task_info.allocation.task.graph_version,
60
71
  function_name=task_info.allocation.task.function_name,
61
72
  graph_invocation_id=task_info.allocation.task.graph_invocation_id,
62
- task_id=task_info.allocation.task.id,
63
73
  allocation_id=task_info.allocation.allocation_id,
64
- function_input=task_info.input,
74
+ request=FunctionInputs(function_input=task_info.input),
65
75
  )
66
76
  # Don't keep the input in memory after we started running the task.
67
77
  task_info.input = None
68
78
 
69
79
  if task_info.init_value is not None:
70
- request.function_init_value.CopyFrom(task_info.init_value)
80
+ task.request.function_init_value.CopyFrom(task_info.init_value)
71
81
  # Don't keep the init value in memory after we started running the task.
72
82
  task_info.init_value = None
73
83
 
@@ -78,50 +88,75 @@ async def run_task_on_function_executor(
78
88
 
79
89
  metric_function_executor_run_task_rpcs.inc()
80
90
  metric_function_executor_run_task_rpcs_in_progress.inc()
81
- start_time = time.monotonic()
82
91
  # Not None if the Function Executor should be terminated after running the task.
83
92
  function_executor_termination_reason: Optional[
84
93
  FunctionExecutorTerminationReason
85
94
  ] = None
86
- execution_start_time: Optional[float] = None
95
+
96
+ # NB: We start this timer before invoking the first RPC, since
97
+ # user code should be executing by the time the create_task() RPC
98
+ # returns, so not attributing the task management RPC overhead to
99
+ # the user would open a possibility for abuse. (This is somewhat
100
+ # mitigated by the fact that these RPCs should have a very low
101
+ # overhead.)
102
+ execution_start_time: Optional[float] = time.monotonic()
87
103
 
88
104
  # If this RPC failed due to customer code crashing the server we won't be
89
105
  # able to detect this. We'll treat this as our own error for now and thus
90
106
  # let the AioRpcError to be raised here.
91
107
  timeout_sec = task_info.allocation.task.timeout_ms / 1000.0
92
108
  try:
93
- channel: grpc.aio.Channel = function_executor.channel()
94
- execution_start_time = time.monotonic()
95
- response: RunTaskResponse = await FunctionExecutorStub(channel).run_task(
96
- request, timeout=timeout_sec
97
- )
98
- task_info.output = _task_output_from_function_executor_response(
109
+ task_result = await _run_task_rpcs(task, function_executor, timeout_sec)
110
+
111
+ task_info.output = _task_output_from_function_executor_result(
99
112
  allocation=task_info.allocation,
100
- response=response,
113
+ result=task_result,
101
114
  execution_start_time=execution_start_time,
102
115
  execution_end_time=time.monotonic(),
103
116
  logger=logger,
104
117
  )
118
+ except asyncio.TimeoutError:
119
+ # This is an await_task() RPC timeout - we're not getting
120
+ # progress messages or a task completion.
121
+ function_executor_termination_reason = (
122
+ FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT
123
+ )
124
+ task_info.output = TaskOutput.function_timeout(
125
+ allocation=task_info.allocation,
126
+ timeout_sec=timeout_sec,
127
+ execution_start_time=execution_start_time,
128
+ execution_end_time=time.monotonic(),
129
+ )
105
130
  except grpc.aio.AioRpcError as e:
131
+ # This indicates some sort of problem communicating with the FE.
132
+ #
133
+ # NB: We charge the user in these situations: code within the
134
+ # FE is not isolated, so not charging would enable abuse.
135
+ #
136
+ # This is an unexpected situation, though, so we make sure to
137
+ # log the situation for further investigation.
138
+
139
+ function_executor_termination_reason = (
140
+ FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY
141
+ )
142
+ metric_function_executor_run_task_rpc_errors.inc()
143
+
106
144
  if e.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
107
- # The task is still running in FE, we only cancelled the client-side RPC.
108
- function_executor_termination_reason = (
109
- FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT
110
- )
111
- task_info.output = TaskOutput.function_timeout(
112
- allocation=task_info.allocation,
113
- timeout_sec=timeout_sec,
114
- execution_start_time=execution_start_time,
115
- execution_end_time=time.monotonic(),
116
- )
145
+ # This is either a create_task() RPC timeout or a
146
+ # delete_task() RPC timeout; either suggests that the FE
147
+ # is unhealthy.
148
+ logger.error("task management RPC execution deadline exceeded", exc_info=e)
117
149
  else:
118
- metric_function_executor_run_task_rpc_errors.inc()
119
- logger.error("task execution failed", exc_info=e)
120
- task_info.output = TaskOutput.internal_error(
121
- allocation=task_info.allocation,
122
- execution_start_time=execution_start_time,
123
- execution_end_time=time.monotonic(),
124
- )
150
+ # This is a status from an unsuccessful RPC; this
151
+ # shouldn't happen, but we handle it.
152
+ logger.error("task management RPC failed", exc_info=e)
153
+
154
+ task_info.output = TaskOutput.function_executor_unresponsive(
155
+ allocation=task_info.allocation,
156
+ execution_start_time=execution_start_time,
157
+ execution_end_time=time.monotonic(),
158
+ )
159
+
125
160
  except asyncio.CancelledError:
126
161
  # The task is still running in FE, we only cancelled the client-side RPC.
127
162
  function_executor_termination_reason = (
@@ -133,15 +168,20 @@ async def run_task_on_function_executor(
133
168
  execution_end_time=time.monotonic(),
134
169
  )
135
170
  except Exception as e:
136
- metric_function_executor_run_task_rpc_errors.inc()
137
- logger.error("task execution failed", exc_info=e)
171
+ # This is an unexpected exception; we believe that this
172
+ # indicates an internal error.
173
+ logger.error(
174
+ "Unexpected internal error during task lifecycle RPC sequence", exc_info=e
175
+ )
138
176
  task_info.output = TaskOutput.internal_error(
139
177
  allocation=task_info.allocation,
140
178
  execution_start_time=execution_start_time,
141
179
  execution_end_time=time.monotonic(),
142
180
  )
143
181
 
144
- metric_function_executor_run_task_rpc_latency.observe(time.monotonic() - start_time)
182
+ metric_function_executor_run_task_rpc_latency.observe(
183
+ time.monotonic() - execution_start_time
184
+ )
145
185
  metric_function_executor_run_task_rpcs_in_progress.dec()
146
186
 
147
187
  function_executor.invocation_state_client().remove_task_to_invocation_id_entry(
@@ -171,26 +211,78 @@ async def run_task_on_function_executor(
171
211
  )
172
212
 
173
213
 
174
- def _task_output_from_function_executor_response(
214
+ async def _run_task_rpcs(
215
+ task: Task, function_executor: FunctionExecutor, timeout_sec: float
216
+ ) -> TaskResult:
217
+ """Runs the task, returning the result, reporting errors via exceptions."""
218
+
219
+ response: AwaitTaskProgress
220
+ channel: grpc.aio.Channel = function_executor.channel()
221
+ fe_stub = FunctionExecutorStub(channel)
222
+
223
+ # Create task with timeout
224
+ await fe_stub.create_task(
225
+ CreateTaskRequest(task=task), timeout=_CREATE_TASK_TIMEOUT_SECS
226
+ )
227
+
228
+ # Await task with timeout resets on each response
229
+ await_rpc = fe_stub.await_task(AwaitTaskRequest(task_id=task.task_id))
230
+
231
+ try:
232
+ while True:
233
+ # Wait for next response with fresh timeout each time
234
+ response = await asyncio.wait_for(await_rpc.read(), timeout=timeout_sec)
235
+ if response.WhichOneof("response") == "task_result":
236
+ # We're done waiting.
237
+ break
238
+
239
+ # NB: We don't actually check for other message types
240
+ # here; any message from the FE is treated as an
241
+ # indication that it's making forward progress.
242
+
243
+ if response == grpc.aio.EOF:
244
+ # Protocol error: we should get a task_result before
245
+ # we see the RPC complete.
246
+ raise grpc.aio.AioRpcError(
247
+ grpc.StatusCode.CANCELLED,
248
+ None,
249
+ None,
250
+ "Function Executor didn't return function/task alloc response",
251
+ )
252
+ finally:
253
+ # Cancel the outstanding RPC to ensure any resources in use
254
+ # are cleaned up; note that this is idempotent (in case the
255
+ # RPC has already completed).
256
+ await_rpc.cancel()
257
+
258
+ # Delete task with timeout
259
+ await fe_stub.delete_task(
260
+ DeleteTaskRequest(task_id=task.task_id), timeout=_DELETE_TASK_TIMEOUT_SECS
261
+ )
262
+
263
+ return response.task_result
264
+
265
+
266
+ def _task_output_from_function_executor_result(
175
267
  allocation: TaskAllocation,
176
- response: RunTaskResponse,
268
+ result: TaskResult,
177
269
  execution_start_time: Optional[float],
178
270
  execution_end_time: Optional[float],
179
271
  logger: Any,
180
272
  ) -> TaskOutput:
181
- response_validator = MessageValidator(response)
273
+ response_validator = MessageValidator(result)
182
274
  response_validator.required_field("stdout")
183
275
  response_validator.required_field("stderr")
184
276
  response_validator.required_field("outcome_code")
185
277
 
186
278
  metrics = TaskMetrics(counters={}, timers={})
187
- if response.HasField("metrics"):
279
+ if result.HasField("metrics"):
188
280
  # Can be None if e.g. function failed.
189
- metrics.counters = dict(response.metrics.counters)
190
- metrics.timers = dict(response.metrics.timers)
281
+ metrics.counters = dict(result.metrics.counters)
282
+ metrics.timers = dict(result.metrics.timers)
191
283
 
192
284
  outcome_code: TaskOutcomeCode = _to_task_outcome_code(
193
- response.outcome_code, logger=logger
285
+ result.outcome_code, logger=logger
194
286
  )
195
287
  failure_reason: Optional[TaskFailureReason] = None
196
288
  invocation_error_output: Optional[SerializedObject] = None
@@ -198,11 +290,11 @@ def _task_output_from_function_executor_response(
198
290
  if outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
199
291
  response_validator.required_field("failure_reason")
200
292
  failure_reason: Optional[TaskFailureReason] = _to_task_failure_reason(
201
- response.failure_reason, logger
293
+ result.failure_reason, logger
202
294
  )
203
295
  if failure_reason == TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR:
204
296
  response_validator.required_field("invocation_error_output")
205
- invocation_error_output = response.invocation_error_output
297
+ invocation_error_output = result.invocation_error_output
206
298
 
207
299
  if _ENABLE_INJECT_TASK_CANCELLATIONS:
208
300
  logger.warning("injecting cancellation failure for the task allocation")
@@ -217,10 +309,10 @@ def _task_output_from_function_executor_response(
217
309
  outcome_code=outcome_code,
218
310
  failure_reason=failure_reason,
219
311
  invocation_error_output=invocation_error_output,
220
- function_outputs=response.function_outputs,
221
- next_functions=response.next_functions,
222
- stdout=response.stdout,
223
- stderr=response.stderr,
312
+ function_outputs=result.function_outputs,
313
+ next_functions=result.next_functions,
314
+ stdout=result.stdout,
315
+ stderr=result.stderr,
224
316
  metrics=metrics,
225
317
  execution_start_time=execution_start_time,
226
318
  execution_end_time=execution_end_time,
@@ -95,6 +95,23 @@ class TaskOutput:
95
95
  execution_end_time=execution_end_time,
96
96
  )
97
97
 
98
+ @classmethod
99
+ def function_executor_unresponsive(
100
+ cls,
101
+ allocation: TaskAllocation,
102
+ execution_start_time: Optional[float],
103
+ execution_end_time: Optional[float],
104
+ ) -> "TaskOutput":
105
+ """Creates a TaskOutput for an unresponsive FE."""
106
+ # Task stdout, stderr is not available.
107
+ return TaskOutput(
108
+ allocation=allocation,
109
+ outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
110
+ failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
111
+ execution_start_time=execution_start_time,
112
+ execution_end_time=execution_end_time,
113
+ )
114
+
98
115
  @classmethod
99
116
  def task_cancelled(
100
117
  cls,
File without changes