indexify 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/executor.py +2 -9
- indexify/executor/blob_store/blob_store.py +110 -26
- indexify/executor/blob_store/local_fs_blob_store.py +41 -1
- indexify/executor/blob_store/metrics/blob_store.py +87 -15
- indexify/executor/blob_store/s3_blob_store.py +112 -1
- indexify/executor/function_executor/function_executor.py +32 -56
- indexify/executor/function_executor/invocation_state_client.py +10 -3
- indexify/executor/function_executor/server/function_executor_server_factory.py +0 -1
- indexify/executor/function_executor_controller/create_function_executor.py +129 -116
- indexify/executor/function_executor_controller/downloads.py +34 -86
- indexify/executor/function_executor_controller/events.py +13 -7
- indexify/executor/function_executor_controller/finalize_task.py +184 -0
- indexify/executor/function_executor_controller/function_executor_controller.py +121 -78
- indexify/executor/function_executor_controller/message_validators.py +10 -3
- indexify/executor/function_executor_controller/metrics/downloads.py +8 -52
- indexify/executor/function_executor_controller/metrics/finalize_task.py +20 -0
- indexify/executor/function_executor_controller/metrics/prepare_task.py +18 -0
- indexify/executor/function_executor_controller/prepare_task.py +232 -14
- indexify/executor/function_executor_controller/run_task.py +77 -61
- indexify/executor/function_executor_controller/task_info.py +4 -7
- indexify/executor/function_executor_controller/task_input.py +21 -0
- indexify/executor/function_executor_controller/task_output.py +26 -35
- indexify/executor/function_executor_controller/terminate_function_executor.py +6 -1
- indexify/executor/logging.py +69 -0
- indexify/executor/monitoring/metrics.py +22 -0
- indexify/proto/executor_api.proto +11 -3
- indexify/proto/executor_api_pb2.py +54 -54
- indexify/proto/executor_api_pb2.pyi +8 -1
- {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/METADATA +6 -6
- {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/RECORD +32 -30
- indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -21
- indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -39
- indexify/executor/function_executor_controller/upload_task_output.py +0 -274
- {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/WHEEL +0 -0
- {indexify-0.4.22.dist-info → indexify-0.4.24.dist-info}/entry_points.txt +0 -0
@@ -9,6 +9,7 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
|
|
9
9
|
InvocationStateResponse,
|
10
10
|
SerializedObject,
|
11
11
|
SerializedObjectEncoding,
|
12
|
+
SerializedObjectManifest,
|
12
13
|
SetInvocationStateResponse,
|
13
14
|
)
|
14
15
|
from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
|
@@ -202,7 +203,7 @@ class InvocationStateClient:
|
|
202
203
|
f"{self._base_url}/internal/namespaces/{self._namespace}/compute_graphs/{self._graph}/invocations/{invocation_id}/ctx/{key}"
|
203
204
|
)
|
204
205
|
if (
|
205
|
-
value.encoding
|
206
|
+
value.manifest.encoding
|
206
207
|
!= SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE
|
207
208
|
):
|
208
209
|
raise ValueError(
|
@@ -285,7 +286,13 @@ def _serialized_object_from_http_response(response: httpx.Response) -> Serialize
|
|
285
286
|
)
|
286
287
|
|
287
288
|
return SerializedObject(
|
289
|
+
manifest=SerializedObjectManifest(
|
290
|
+
encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE,
|
291
|
+
encoding_version=0,
|
292
|
+
size=len(response.content),
|
293
|
+
# We don't store any hash on the server side right now and it's not safe
|
294
|
+
# to compute it here as this is user manipulated data.
|
295
|
+
sha256_hash="fake_hash",
|
296
|
+
),
|
288
297
|
data=response.content,
|
289
|
-
encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE,
|
290
|
-
encoding_version=0,
|
291
298
|
)
|
@@ -3,14 +3,17 @@ from pathlib import Path
|
|
3
3
|
from typing import Any, Optional, Tuple
|
4
4
|
|
5
5
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
6
|
+
InitializationFailureReason,
|
7
|
+
InitializationOutcomeCode,
|
6
8
|
InitializeRequest,
|
9
|
+
InitializeResponse,
|
7
10
|
SerializedObject,
|
8
11
|
)
|
12
|
+
from tensorlake.function_executor.proto.message_validator import MessageValidator
|
9
13
|
|
10
14
|
from indexify.executor.blob_store.blob_store import BLOBStore
|
11
15
|
from indexify.executor.function_executor.function_executor import (
|
12
16
|
FunctionExecutor,
|
13
|
-
FunctionExecutorInitializationError,
|
14
17
|
FunctionExecutorInitializationResult,
|
15
18
|
)
|
16
19
|
from indexify.executor.function_executor.server.function_executor_server_factory import (
|
@@ -18,16 +21,12 @@ from indexify.executor.function_executor.server.function_executor_server_factory
|
|
18
21
|
FunctionExecutorServerFactory,
|
19
22
|
)
|
20
23
|
from indexify.proto.executor_api_pb2 import (
|
21
|
-
DataPayload,
|
22
|
-
DataPayloadEncoding,
|
23
24
|
FunctionExecutorDescription,
|
24
25
|
FunctionExecutorTerminationReason,
|
25
26
|
)
|
26
27
|
|
27
28
|
from .downloads import download_graph
|
28
29
|
from .events import FunctionExecutorCreated
|
29
|
-
from .function_executor_startup_output import FunctionExecutorStartupOutput
|
30
|
-
from .upload_task_output import compute_hash
|
31
30
|
|
32
31
|
|
33
32
|
async def create_function_executor(
|
@@ -56,125 +55,114 @@ async def create_function_executor(
|
|
56
55
|
cache_path=cache_path,
|
57
56
|
logger=logger,
|
58
57
|
)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
except asyncio.CancelledError:
|
59
|
+
# Cancelled FE startup means that Server removed this FE from desired state. We don't have FE termination reason for the case
|
60
|
+
# when Server removed FE from desired state because we can't rely on its delivery because FE removed from desired state can get
|
61
|
+
# removed from reported state at any moment. Thus we can use any termination reason here.
|
63
62
|
return FunctionExecutorCreated(
|
64
|
-
function_executor=
|
65
|
-
|
66
|
-
function_executor_description=function_executor_description,
|
67
|
-
result=result,
|
68
|
-
blob_store=blob_store,
|
69
|
-
logger=logger,
|
70
|
-
),
|
63
|
+
function_executor=None,
|
64
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED,
|
71
65
|
)
|
72
66
|
except BaseException as e:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
"failed to create function executor due to platform error",
|
78
|
-
exc_info=e,
|
79
|
-
)
|
80
|
-
|
81
|
-
# Cancelled FE startup means that Server removed it from desired state so it doesn't matter what termination_reason we return
|
82
|
-
# in this case cause this FE will be removed from Executor reported state.
|
67
|
+
logger.error(
|
68
|
+
"failed to create function executor",
|
69
|
+
exc_info=e,
|
70
|
+
)
|
83
71
|
return FunctionExecutorCreated(
|
84
72
|
function_executor=None,
|
85
|
-
|
86
|
-
function_executor_description=function_executor_description,
|
87
|
-
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
|
88
|
-
),
|
73
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
|
89
74
|
)
|
90
75
|
|
76
|
+
function_executor: FunctionExecutor
|
77
|
+
result: FunctionExecutorInitializationResult
|
78
|
+
# No await here so this call can't be cancelled.
|
79
|
+
fe_created_event: FunctionExecutorCreated = _to_fe_created_event(
|
80
|
+
function_executor=function_executor,
|
81
|
+
result=result,
|
82
|
+
logger=logger,
|
83
|
+
)
|
84
|
+
if fe_created_event.function_executor is None:
|
85
|
+
try:
|
86
|
+
await asyncio.shield(function_executor.destroy())
|
87
|
+
except asyncio.CancelledError:
|
88
|
+
# destroy() finished due to the shield, return fe_created_event.
|
89
|
+
pass
|
91
90
|
|
92
|
-
|
93
|
-
|
91
|
+
return fe_created_event
|
92
|
+
|
93
|
+
|
94
|
+
def _to_fe_created_event(
|
95
|
+
function_executor: FunctionExecutor,
|
94
96
|
result: FunctionExecutorInitializationResult,
|
95
|
-
blob_store: BLOBStore,
|
96
97
|
logger: Any,
|
97
|
-
) ->
|
98
|
-
"""Converts FunctionExecutorInitializationResult to
|
99
|
-
|
100
|
-
Uploads stdout and stderr to blob store if they are present. Does only one attempt to do that.
|
101
|
-
Doesn't raise any exceptions."""
|
102
|
-
termination_reason: FunctionExecutorTerminationReason = None
|
103
|
-
if result.error is not None:
|
104
|
-
if result.error == FunctionExecutorInitializationError.FUNCTION_ERROR:
|
105
|
-
termination_reason = (
|
106
|
-
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
|
107
|
-
)
|
108
|
-
elif result.error == FunctionExecutorInitializationError.FUNCTION_TIMEOUT:
|
109
|
-
termination_reason = (
|
110
|
-
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT
|
111
|
-
)
|
112
|
-
else:
|
113
|
-
logger.error(
|
114
|
-
"unexpected function executor initialization error code",
|
115
|
-
error_code=FunctionExecutorInitializationError.name(result.error),
|
116
|
-
)
|
117
|
-
termination_reason = (
|
118
|
-
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR
|
119
|
-
)
|
98
|
+
) -> FunctionExecutorCreated:
|
99
|
+
"""Converts FunctionExecutorInitializationResult to FunctionExecutorCreated event.
|
120
100
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
output_url=url,
|
128
|
-
blob_store=blob_store,
|
129
|
-
logger=logger,
|
101
|
+
Doesn't raise any exceptions.
|
102
|
+
"""
|
103
|
+
if result.is_timeout:
|
104
|
+
return FunctionExecutorCreated(
|
105
|
+
function_executor=None,
|
106
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT,
|
130
107
|
)
|
131
108
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
logger=logger,
|
109
|
+
if result.response is None:
|
110
|
+
# This is a grey failure where we don't know the exact cause.
|
111
|
+
# Treat it as a customer function error to prevent service abuse by intentionally
|
112
|
+
# triggering function executor creations failures that don't get billed.
|
113
|
+
logger.error("function executor startup failed with no response")
|
114
|
+
return FunctionExecutorCreated(
|
115
|
+
function_executor=None,
|
116
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR,
|
141
117
|
)
|
142
118
|
|
143
|
-
|
144
|
-
function_executor_description=function_executor_description,
|
145
|
-
termination_reason=termination_reason,
|
146
|
-
stdout=stdout,
|
147
|
-
stderr=stderr,
|
148
|
-
)
|
149
|
-
|
150
|
-
|
151
|
-
async def _upload_initialization_output(
|
152
|
-
output_name: str, output: str, output_url: str, blob_store: BLOBStore, logger: Any
|
153
|
-
) -> Optional[DataPayload]:
|
154
|
-
"""Uploads text to blob store. Returns None if the upload fails.
|
155
|
-
|
156
|
-
Doesn't raise any exceptions.
|
157
|
-
"""
|
119
|
+
initialize_response: InitializeResponse = result.response
|
158
120
|
try:
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
f"function executor initialization output {output_name} uploaded to blob store",
|
163
|
-
size=len(output_bytes),
|
164
|
-
)
|
165
|
-
return DataPayload(
|
166
|
-
uri=output_url,
|
167
|
-
size=len(output_bytes),
|
168
|
-
sha256_hash=compute_hash(output_bytes),
|
169
|
-
encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
|
170
|
-
encoding_version=0,
|
171
|
-
)
|
172
|
-
except Exception as e:
|
121
|
+
_validate_initialize_response(initialize_response)
|
122
|
+
except ValueError as e:
|
123
|
+
# Grey failure mode. Treat as customer function error to prevent service abuse but log for future investigations.
|
173
124
|
logger.error(
|
174
|
-
|
175
|
-
|
125
|
+
"function executor initialization failed with invalid response", exc_info=e
|
126
|
+
)
|
127
|
+
return FunctionExecutorCreated(
|
128
|
+
function_executor=None,
|
129
|
+
fe_termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR,
|
176
130
|
)
|
177
|
-
|
131
|
+
|
132
|
+
# Print FE logs directly to Executor logs so operators can see them.
|
133
|
+
# Uncomment these lines once we stop printing FE logs to stdout/stderr.
|
134
|
+
# logger.info("Function Executor logs during initialization:")
|
135
|
+
# print(initialize_response.diagnostics.function_executor_log)
|
136
|
+
|
137
|
+
fe_termination_reason: Optional[FunctionExecutorTerminationReason] = None
|
138
|
+
if (
|
139
|
+
initialize_response.outcome_code
|
140
|
+
== InitializationOutcomeCode.INITIALIZATION_OUTCOME_CODE_FAILURE
|
141
|
+
):
|
142
|
+
if (
|
143
|
+
initialize_response.failure_reason
|
144
|
+
== InitializationFailureReason.INITIALIZATION_FAILURE_REASON_FUNCTION_ERROR
|
145
|
+
):
|
146
|
+
fe_termination_reason = (
|
147
|
+
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
|
148
|
+
)
|
149
|
+
else:
|
150
|
+
# Treat all other failure reasons as grey failures. Report them as function errors to prevent service abuse.
|
151
|
+
# Log them for awareness and future investigations.
|
152
|
+
logger.error(
|
153
|
+
"function executor initialization failed",
|
154
|
+
failure_reason=InitializationFailureReason.Name(
|
155
|
+
initialize_response.failure_reason
|
156
|
+
),
|
157
|
+
)
|
158
|
+
fe_termination_reason = (
|
159
|
+
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
|
160
|
+
)
|
161
|
+
|
162
|
+
return FunctionExecutorCreated(
|
163
|
+
function_executor=function_executor,
|
164
|
+
fe_termination_reason=fe_termination_reason,
|
165
|
+
)
|
178
166
|
|
179
167
|
|
180
168
|
async def _create_function_executor(
|
@@ -189,7 +177,7 @@ async def _create_function_executor(
|
|
189
177
|
) -> Tuple[FunctionExecutor, FunctionExecutorInitializationResult]:
|
190
178
|
"""Creates a function executor.
|
191
179
|
|
192
|
-
Raises Exception on
|
180
|
+
Raises Exception on internal Executor error.
|
193
181
|
"""
|
194
182
|
graph: SerializedObject = await download_graph(
|
195
183
|
function_executor_description=function_executor_description,
|
@@ -209,15 +197,12 @@ async def _create_function_executor(
|
|
209
197
|
graph_name=function_executor_description.graph_name,
|
210
198
|
graph_version=function_executor_description.graph_version,
|
211
199
|
function_name=function_executor_description.function_name,
|
212
|
-
image_uri=None,
|
213
200
|
secret_names=list(function_executor_description.secret_names),
|
214
201
|
cpu_ms_per_sec=function_executor_description.resources.cpu_ms_per_sec,
|
215
202
|
memory_bytes=function_executor_description.resources.memory_bytes,
|
216
203
|
disk_bytes=function_executor_description.resources.disk_bytes,
|
217
204
|
gpu_count=gpu_count,
|
218
205
|
)
|
219
|
-
if function_executor_description.HasField("image_uri"):
|
220
|
-
config.image_uri = function_executor_description.image_uri
|
221
206
|
|
222
207
|
initialize_request: InitializeRequest = InitializeRequest(
|
223
208
|
namespace=function_executor_description.namespace,
|
@@ -226,11 +211,9 @@ async def _create_function_executor(
|
|
226
211
|
function_name=function_executor_description.function_name,
|
227
212
|
graph=graph,
|
228
213
|
)
|
229
|
-
customer_code_timeout_sec:
|
230
|
-
|
231
|
-
|
232
|
-
function_executor_description.customer_code_timeout_ms / 1000.0
|
233
|
-
)
|
214
|
+
customer_code_timeout_sec: float = (
|
215
|
+
function_executor_description.customer_code_timeout_ms / 1000.0
|
216
|
+
)
|
234
217
|
|
235
218
|
function_executor: FunctionExecutor = FunctionExecutor(
|
236
219
|
server_factory=function_executor_server_factory, logger=logger
|
@@ -248,5 +231,35 @@ async def _create_function_executor(
|
|
248
231
|
)
|
249
232
|
return (function_executor, result)
|
250
233
|
except BaseException: # includes asyncio.CancelledError and anything else
|
251
|
-
await
|
234
|
+
# This await is a cancellation point, need to shield to ensure we destroyed the FE.
|
235
|
+
await asyncio.shield(function_executor.destroy())
|
252
236
|
raise
|
237
|
+
|
238
|
+
|
239
|
+
def _validate_initialize_response(
|
240
|
+
response: InitializeResponse,
|
241
|
+
) -> None:
|
242
|
+
"""Validates the initialization response.
|
243
|
+
|
244
|
+
Raises ValueError if the response is not valid.
|
245
|
+
"""
|
246
|
+
validator: MessageValidator = MessageValidator(response)
|
247
|
+
(validator.required_field("outcome_code").required_field("diagnostics"))
|
248
|
+
if (
|
249
|
+
response.outcome_code
|
250
|
+
== InitializationOutcomeCode.INITIALIZATION_OUTCOME_CODE_FAILURE
|
251
|
+
):
|
252
|
+
validator.required_field("failure_reason")
|
253
|
+
|
254
|
+
if response.outcome_code not in [
|
255
|
+
InitializationOutcomeCode.INITIALIZATION_OUTCOME_CODE_SUCCESS,
|
256
|
+
InitializationOutcomeCode.INITIALIZATION_OUTCOME_CODE_FAILURE,
|
257
|
+
]:
|
258
|
+
raise ValueError(f"Invalid outcome code: {response.outcome_code}")
|
259
|
+
|
260
|
+
if response.failure_reason not in [
|
261
|
+
InitializationFailureReason.INITIALIZATION_FAILURE_REASON_UNKNOWN,
|
262
|
+
InitializationFailureReason.INITIALIZATION_FAILURE_REASON_FUNCTION_ERROR,
|
263
|
+
InitializationFailureReason.INITIALIZATION_FAILURE_REASON_INTERNAL_ERROR,
|
264
|
+
]:
|
265
|
+
raise ValueError(f"Invalid failure reason: {response.failure_reason}")
|
@@ -7,6 +7,7 @@ import nanoid
|
|
7
7
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
8
8
|
SerializedObject,
|
9
9
|
SerializedObjectEncoding,
|
10
|
+
SerializedObjectManifest,
|
10
11
|
)
|
11
12
|
|
12
13
|
from indexify.executor.blob_store.blob_store import BLOBStore
|
@@ -21,15 +22,6 @@ from .metrics.downloads import (
|
|
21
22
|
metric_graph_download_latency,
|
22
23
|
metric_graph_downloads,
|
23
24
|
metric_graphs_from_cache,
|
24
|
-
metric_reducer_init_value_download_errors,
|
25
|
-
metric_reducer_init_value_download_latency,
|
26
|
-
metric_reducer_init_value_downloads,
|
27
|
-
metric_task_input_download_errors,
|
28
|
-
metric_task_input_download_latency,
|
29
|
-
metric_task_input_downloads,
|
30
|
-
metric_tasks_downloading_graphs,
|
31
|
-
metric_tasks_downloading_inputs,
|
32
|
-
metric_tasks_downloading_reducer_init_value,
|
33
25
|
)
|
34
26
|
|
35
27
|
|
@@ -42,7 +34,6 @@ async def download_graph(
|
|
42
34
|
logger = logger.bind(module=__name__)
|
43
35
|
with (
|
44
36
|
metric_graph_download_errors.count_exceptions(),
|
45
|
-
metric_tasks_downloading_graphs.track_inprogress(),
|
46
37
|
metric_graph_download_latency.time(),
|
47
38
|
):
|
48
39
|
metric_graph_downloads.inc()
|
@@ -54,56 +45,6 @@ async def download_graph(
|
|
54
45
|
)
|
55
46
|
|
56
47
|
|
57
|
-
async def download_input(
|
58
|
-
data_payload: DataPayload,
|
59
|
-
blob_store: BLOBStore,
|
60
|
-
logger: Any,
|
61
|
-
) -> SerializedObject:
|
62
|
-
logger = logger.bind(module=__name__)
|
63
|
-
with (
|
64
|
-
metric_task_input_download_errors.count_exceptions(),
|
65
|
-
metric_tasks_downloading_inputs.track_inprogress(),
|
66
|
-
metric_task_input_download_latency.time(),
|
67
|
-
):
|
68
|
-
metric_task_input_downloads.inc()
|
69
|
-
return await _download_input(
|
70
|
-
data_payload=data_payload,
|
71
|
-
blob_store=blob_store,
|
72
|
-
logger=logger,
|
73
|
-
)
|
74
|
-
|
75
|
-
|
76
|
-
async def download_init_value(
|
77
|
-
data_payload: DataPayload,
|
78
|
-
blob_store: BLOBStore,
|
79
|
-
logger: Any,
|
80
|
-
) -> SerializedObject:
|
81
|
-
logger = logger.bind(module=__name__)
|
82
|
-
with (
|
83
|
-
metric_reducer_init_value_download_errors.count_exceptions(),
|
84
|
-
metric_tasks_downloading_reducer_init_value.track_inprogress(),
|
85
|
-
metric_reducer_init_value_download_latency.time(),
|
86
|
-
):
|
87
|
-
metric_reducer_init_value_downloads.inc()
|
88
|
-
return await _download_input(
|
89
|
-
data_payload=data_payload,
|
90
|
-
blob_store=blob_store,
|
91
|
-
logger=logger,
|
92
|
-
)
|
93
|
-
|
94
|
-
|
95
|
-
async def _download_input(
|
96
|
-
data_payload: DataPayload,
|
97
|
-
blob_store: BLOBStore,
|
98
|
-
logger: Any,
|
99
|
-
) -> SerializedObject:
|
100
|
-
data: bytes = await blob_store.get(uri=data_payload.uri, logger=logger)
|
101
|
-
return _serialized_object_from_data_payload_proto(
|
102
|
-
data_payload=data_payload,
|
103
|
-
data=data,
|
104
|
-
)
|
105
|
-
|
106
|
-
|
107
48
|
async def _download_graph(
|
108
49
|
function_executor_description: FunctionExecutorDescription,
|
109
50
|
cache_path: Path,
|
@@ -130,8 +71,10 @@ async def _download_graph(
|
|
130
71
|
data: bytes = await blob_store.get(
|
131
72
|
uri=function_executor_description.graph.uri, logger=logger
|
132
73
|
)
|
133
|
-
graph =
|
134
|
-
|
74
|
+
graph: SerializedObject = SerializedObject(
|
75
|
+
manifest=serialized_object_manifest_from_data_payload_proto(
|
76
|
+
function_executor_description.graph
|
77
|
+
),
|
135
78
|
data=data,
|
136
79
|
)
|
137
80
|
|
@@ -173,38 +116,43 @@ def _write_cached_graph(path: str, graph: SerializedObject, cache_path: Path) ->
|
|
173
116
|
os.replace(tmp_path, path)
|
174
117
|
|
175
118
|
|
176
|
-
def
|
177
|
-
data_payload: DataPayload,
|
178
|
-
) ->
|
179
|
-
"""Converts the given data payload
|
119
|
+
def serialized_object_manifest_from_data_payload_proto(
|
120
|
+
data_payload: DataPayload,
|
121
|
+
) -> SerializedObjectManifest:
|
122
|
+
"""Converts the given data payload into SerializedObjectManifest accepted by Function Executor.
|
180
123
|
|
181
|
-
Raises ValueError if the supplied data payload can't be converted
|
124
|
+
Raises ValueError if the supplied data payload can't be converted.
|
182
125
|
"""
|
126
|
+
so_manifest: SerializedObjectManifest = SerializedObjectManifest(
|
127
|
+
# Server currently ignores encoding version so we set it to default 0.
|
128
|
+
encoding_version=(
|
129
|
+
data_payload.encoding_version
|
130
|
+
if data_payload.HasField("encoding_version")
|
131
|
+
else 0
|
132
|
+
),
|
133
|
+
sha256_hash=data_payload.sha256_hash,
|
134
|
+
size=data_payload.size,
|
135
|
+
)
|
136
|
+
|
183
137
|
if data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE:
|
184
|
-
|
185
|
-
|
186
|
-
encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE,
|
187
|
-
encoding_version=data_payload.encoding_version,
|
138
|
+
so_manifest.encoding = (
|
139
|
+
SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE
|
188
140
|
)
|
189
141
|
elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT:
|
190
|
-
|
191
|
-
|
192
|
-
encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_TEXT,
|
193
|
-
encoding_version=data_payload.encoding_version,
|
142
|
+
so_manifest.encoding = (
|
143
|
+
SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_TEXT
|
194
144
|
)
|
195
145
|
elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON:
|
196
|
-
|
197
|
-
|
198
|
-
encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_JSON,
|
199
|
-
encoding_version=data_payload.encoding_version,
|
146
|
+
so_manifest.encoding = (
|
147
|
+
SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_JSON
|
200
148
|
)
|
201
149
|
elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_ZIP:
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
150
|
+
so_manifest.encoding = (
|
151
|
+
SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_ZIP
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
raise ValueError(
|
155
|
+
f"Can't convert data payload {data_payload} into serialized object"
|
206
156
|
)
|
207
157
|
|
208
|
-
|
209
|
-
f"Can't convert data payload {data_payload} into serialized object"
|
210
|
-
)
|
158
|
+
return so_manifest
|
@@ -1,12 +1,15 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
+
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
5
|
+
FunctionInputs,
|
6
|
+
)
|
7
|
+
|
4
8
|
from indexify.executor.function_executor.function_executor import (
|
5
9
|
FunctionExecutor,
|
6
10
|
)
|
7
11
|
from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
|
8
12
|
|
9
|
-
from .function_executor_startup_output import FunctionExecutorStartupOutput
|
10
13
|
from .task_info import TaskInfo
|
11
14
|
|
12
15
|
|
@@ -37,17 +40,20 @@ class FunctionExecutorCreated(BaseEvent):
|
|
37
40
|
"""
|
38
41
|
Event indicating that Function Executor got created or failed.
|
39
42
|
|
40
|
-
The function_executor field is None if the function executor
|
43
|
+
The function_executor field is None if the function executor creation failed.
|
44
|
+
In this case the fe_termination_reason field is set to the reason why.
|
41
45
|
"""
|
42
46
|
|
43
47
|
def __init__(
|
44
48
|
self,
|
45
|
-
|
46
|
-
|
49
|
+
function_executor: Optional[FunctionExecutor],
|
50
|
+
fe_termination_reason: Optional[FunctionExecutorTerminationReason],
|
47
51
|
):
|
48
52
|
super().__init__(EventType.FUNCTION_EXECUTOR_CREATED)
|
49
53
|
self.function_executor: Optional[FunctionExecutor] = function_executor
|
50
|
-
self.
|
54
|
+
self.fe_termination_reason: Optional[FunctionExecutorTerminationReason] = (
|
55
|
+
fe_termination_reason
|
56
|
+
)
|
51
57
|
|
52
58
|
|
53
59
|
class FunctionExecutorTerminated(BaseEvent):
|
@@ -151,9 +157,9 @@ class TaskExecutionFinished(BaseEvent):
|
|
151
157
|
)
|
152
158
|
|
153
159
|
|
154
|
-
class
|
160
|
+
class TaskFinalizationFinished(BaseEvent):
|
155
161
|
"""
|
156
|
-
Event indicating that a task
|
162
|
+
Event indicating that a task finalization is finished.
|
157
163
|
"""
|
158
164
|
|
159
165
|
def __init__(self, task_info: TaskInfo, is_success: bool):
|