indexify 0.3.19__py3-none-any.whl → 0.3.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/cli.py +12 -0
- indexify/executor/api_objects.py +11 -6
- indexify/executor/blob_store/blob_store.py +69 -0
- indexify/executor/blob_store/local_fs_blob_store.py +48 -0
- indexify/executor/blob_store/metrics/blob_store.py +33 -0
- indexify/executor/blob_store/s3_blob_store.py +88 -0
- indexify/executor/downloader.py +192 -27
- indexify/executor/executor.py +29 -13
- indexify/executor/function_executor/function_executor.py +1 -1
- indexify/executor/function_executor/function_executor_states_container.py +5 -0
- indexify/executor/function_executor/function_executor_status.py +2 -0
- indexify/executor/function_executor/health_checker.py +7 -2
- indexify/executor/function_executor/invocation_state_client.py +4 -2
- indexify/executor/function_executor/single_task_runner.py +2 -0
- indexify/executor/function_executor/task_output.py +8 -1
- indexify/executor/grpc/channel_manager.py +4 -3
- indexify/executor/grpc/function_executor_controller.py +163 -193
- indexify/executor/grpc/metrics/state_reconciler.py +17 -0
- indexify/executor/grpc/metrics/task_controller.py +8 -0
- indexify/executor/grpc/state_reconciler.py +305 -188
- indexify/executor/grpc/state_reporter.py +18 -10
- indexify/executor/grpc/task_controller.py +247 -189
- indexify/executor/metrics/task_reporter.py +17 -0
- indexify/executor/task_reporter.py +217 -94
- indexify/executor/task_runner.py +1 -0
- indexify/proto/executor_api.proto +37 -11
- indexify/proto/executor_api_pb2.py +49 -47
- indexify/proto/executor_api_pb2.pyi +55 -15
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/METADATA +2 -1
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/RECORD +32 -27
- indexify/executor/grpc/completed_tasks_container.py +0 -26
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/WHEEL +0 -0
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import hashlib
|
2
3
|
import time
|
3
4
|
from typing import Any, List, Optional, Tuple
|
4
5
|
|
@@ -7,8 +8,9 @@ from httpx import Timeout
|
|
7
8
|
from tensorlake.function_executor.proto.function_executor_pb2 import FunctionOutput
|
8
9
|
from tensorlake.utils.http_client import get_httpx_client
|
9
10
|
|
11
|
+
from indexify.proto.executor_api_pb2 import DataPayload as DataPayloadProto
|
10
12
|
from indexify.proto.executor_api_pb2 import (
|
11
|
-
|
13
|
+
DataPayloadEncoding,
|
12
14
|
OutputEncoding,
|
13
15
|
ReportTaskOutcomeRequest,
|
14
16
|
TaskOutcome,
|
@@ -18,10 +20,12 @@ from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
|
|
18
20
|
from .api_objects import (
|
19
21
|
TASK_OUTCOME_FAILURE,
|
20
22
|
TASK_OUTCOME_SUCCESS,
|
23
|
+
DataPayload,
|
21
24
|
IngestFnOutputsResponse,
|
22
25
|
RouterOutput,
|
23
26
|
TaskResult,
|
24
27
|
)
|
28
|
+
from .blob_store.blob_store import BLOBStore
|
25
29
|
from .function_executor.task_output import TaskOutput
|
26
30
|
from .grpc.channel_manager import ChannelManager
|
27
31
|
from .metrics.task_reporter import (
|
@@ -31,6 +35,9 @@ from .metrics.task_reporter import (
|
|
31
35
|
metric_server_ingest_files_errors,
|
32
36
|
metric_server_ingest_files_latency,
|
33
37
|
metric_server_ingest_files_requests,
|
38
|
+
metric_task_output_blob_store_upload_errors,
|
39
|
+
metric_task_output_blob_store_upload_latency,
|
40
|
+
metric_task_output_blob_store_uploads,
|
34
41
|
)
|
35
42
|
|
36
43
|
|
@@ -62,6 +69,7 @@ class TaskReporter:
|
|
62
69
|
base_url: str,
|
63
70
|
executor_id: str,
|
64
71
|
channel_manager: ChannelManager,
|
72
|
+
blob_store: BLOBStore,
|
65
73
|
config_path: Optional[str] = None,
|
66
74
|
):
|
67
75
|
self._base_url = base_url
|
@@ -74,8 +82,9 @@ class TaskReporter:
|
|
74
82
|
# results in not reusing established TCP connections to server.
|
75
83
|
self._client = get_httpx_client(config_path, make_async=False)
|
76
84
|
self._channel_manager = channel_manager
|
85
|
+
self._blob_store = blob_store
|
77
86
|
|
78
|
-
async def shutdown(self):
|
87
|
+
async def shutdown(self) -> None:
|
79
88
|
"""Shuts down the task reporter.
|
80
89
|
|
81
90
|
Task reporter stops reporting all task outcomes to the Server.
|
@@ -84,7 +93,7 @@ class TaskReporter:
|
|
84
93
|
"""
|
85
94
|
self._is_shutdown = True
|
86
95
|
|
87
|
-
async def report(self, output: TaskOutput, logger: Any):
|
96
|
+
async def report(self, output: TaskOutput, logger: Any) -> None:
|
88
97
|
"""Reports result of the supplied task."""
|
89
98
|
logger = logger.bind(module=__name__)
|
90
99
|
|
@@ -94,9 +103,13 @@ class TaskReporter:
|
|
94
103
|
)
|
95
104
|
return
|
96
105
|
|
97
|
-
|
98
|
-
|
106
|
+
# TODO: If the files are uploaded successfully,
|
107
|
+
# we should record that so that if we fail to report
|
108
|
+
# the task outcome, we don't retry the upload.
|
109
|
+
# This will save us some time and resources.
|
110
|
+
# It's good to do this once we delete all the legacy code paths.
|
99
111
|
|
112
|
+
output_summary: TaskOutputSummary = _task_output_summary(output)
|
100
113
|
logger.info(
|
101
114
|
"reporting task outcome",
|
102
115
|
total_bytes=output_summary.total_bytes,
|
@@ -110,6 +123,80 @@ class TaskReporter:
|
|
110
123
|
stderr_bytes=output_summary.stderr_total_bytes,
|
111
124
|
)
|
112
125
|
|
126
|
+
if output.output_payload_uri_prefix is None:
|
127
|
+
ingested_files = await self._ingest_files_at_server(output, logger)
|
128
|
+
else:
|
129
|
+
ingested_files = await self._ingest_files_at_blob_store(output, logger)
|
130
|
+
|
131
|
+
fn_outputs = []
|
132
|
+
for data_payload in ingested_files.data_payloads:
|
133
|
+
fn_outputs.append(
|
134
|
+
DataPayloadProto(
|
135
|
+
path=data_payload.path, # TODO: stop using this deprecated field once Server side migration is done.
|
136
|
+
uri=data_payload.path,
|
137
|
+
size=data_payload.size,
|
138
|
+
sha256_hash=data_payload.sha256_hash,
|
139
|
+
encoding=_to_grpc_data_payload_encoding(output),
|
140
|
+
encoding_version=0,
|
141
|
+
)
|
142
|
+
)
|
143
|
+
stdout, stderr = None, None
|
144
|
+
if ingested_files.stdout is not None:
|
145
|
+
stdout = DataPayloadProto(
|
146
|
+
path=ingested_files.stdout.path, # TODO: stop using this deprecated field once Server side migration is done.
|
147
|
+
uri=ingested_files.stdout.path,
|
148
|
+
size=ingested_files.stdout.size,
|
149
|
+
sha256_hash=ingested_files.stdout.sha256_hash,
|
150
|
+
encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
|
151
|
+
encoding_version=0,
|
152
|
+
)
|
153
|
+
if ingested_files.stderr is not None:
|
154
|
+
stderr = DataPayloadProto(
|
155
|
+
path=ingested_files.stderr.path, # TODO: stop using this deprecated field once Server side migration is done.
|
156
|
+
uri=ingested_files.stderr.path,
|
157
|
+
size=ingested_files.stderr.size,
|
158
|
+
sha256_hash=ingested_files.stderr.sha256_hash,
|
159
|
+
encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
|
160
|
+
encoding_version=0,
|
161
|
+
)
|
162
|
+
|
163
|
+
request = ReportTaskOutcomeRequest(
|
164
|
+
task_id=output.task_id,
|
165
|
+
namespace=output.namespace,
|
166
|
+
graph_name=output.graph_name,
|
167
|
+
function_name=output.function_name,
|
168
|
+
graph_invocation_id=output.graph_invocation_id,
|
169
|
+
outcome=_to_grpc_task_outcome(output),
|
170
|
+
invocation_id=output.graph_invocation_id,
|
171
|
+
executor_id=self._executor_id,
|
172
|
+
reducer=output.reducer,
|
173
|
+
next_functions=(output.router_output.edges if output.router_output else []),
|
174
|
+
fn_outputs=fn_outputs,
|
175
|
+
stdout=stdout,
|
176
|
+
stderr=stderr,
|
177
|
+
output_encoding=_to_grpc_output_encoding(output),
|
178
|
+
output_encoding_version=0,
|
179
|
+
)
|
180
|
+
try:
|
181
|
+
stub = ExecutorAPIStub(await self._channel_manager.get_channel())
|
182
|
+
with (
|
183
|
+
metric_report_task_outcome_latency.time(),
|
184
|
+
metric_report_task_outcome_errors.count_exceptions(),
|
185
|
+
):
|
186
|
+
metric_report_task_outcome_rpcs.inc()
|
187
|
+
await stub.report_task_outcome(request, timeout=5.0)
|
188
|
+
except Exception as e:
|
189
|
+
logger.error("failed to report task outcome", error=e)
|
190
|
+
raise e
|
191
|
+
|
192
|
+
async def _ingest_files_at_server(
|
193
|
+
self, output: TaskOutput, logger: Any
|
194
|
+
) -> IngestFnOutputsResponse:
|
195
|
+
logger.warning("uploading task output files to server (deprecated mode)")
|
196
|
+
|
197
|
+
task_result, output_files = self._process_task_output(output)
|
198
|
+
task_result_data = task_result.model_dump_json(exclude_none=True)
|
199
|
+
|
113
200
|
kwargs = {
|
114
201
|
"data": {"task_result": task_result_data},
|
115
202
|
# Use httpx default timeout of 5s for all timeout types.
|
@@ -132,7 +219,7 @@ class TaskReporter:
|
|
132
219
|
)
|
133
220
|
end_time = time.time()
|
134
221
|
logger.info(
|
135
|
-
"files uploaded",
|
222
|
+
"files uploaded to server",
|
136
223
|
response_time=end_time - start_time,
|
137
224
|
response_code=response.status_code,
|
138
225
|
)
|
@@ -148,68 +235,86 @@ class TaskReporter:
|
|
148
235
|
f"Response text: '{response.text}'."
|
149
236
|
) from e
|
150
237
|
|
151
|
-
# TODO: If the files are uploaded successfully,
|
152
|
-
# we should record that so that if we fail to report
|
153
|
-
# the task outcome, we don't retry the upload.
|
154
|
-
# This will save us some time and resources.
|
155
|
-
|
156
238
|
ingested_files_response = response.json()
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
239
|
+
return IngestFnOutputsResponse.model_validate(ingested_files_response)
|
240
|
+
|
241
|
+
async def _ingest_files_at_blob_store(
|
242
|
+
self, output: TaskOutput, logger: Any
|
243
|
+
) -> IngestFnOutputsResponse:
|
244
|
+
start_time = time.time()
|
245
|
+
with (
|
246
|
+
metric_task_output_blob_store_upload_latency.time(),
|
247
|
+
metric_task_output_blob_store_upload_errors.count_exceptions(),
|
248
|
+
):
|
249
|
+
metric_task_output_blob_store_uploads.inc()
|
250
|
+
response = await self._upload_output_to_blob_store(output, logger)
|
251
|
+
|
252
|
+
logger.info(
|
253
|
+
"files uploaded to blob store",
|
254
|
+
duration=time.time() - start_time,
|
255
|
+
)
|
256
|
+
return response
|
257
|
+
|
258
|
+
async def _upload_output_to_blob_store(
|
259
|
+
self, output: TaskOutput, logger: Any
|
260
|
+
) -> IngestFnOutputsResponse:
|
261
|
+
data_payloads: List[DataPayload] = []
|
262
|
+
stdout: Optional[DataPayload] = None
|
263
|
+
stderr: Optional[DataPayload] = None
|
264
|
+
|
265
|
+
if output.stdout is not None:
|
266
|
+
stdout_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stdout"
|
267
|
+
stdout_bytes: bytes = output.stdout.encode()
|
268
|
+
await self._blob_store.put(stdout_url, stdout_bytes, logger)
|
169
269
|
stdout = DataPayload(
|
170
|
-
path=
|
171
|
-
size=
|
172
|
-
sha256_hash=
|
270
|
+
path=stdout_url,
|
271
|
+
size=len(stdout_bytes),
|
272
|
+
sha256_hash=_compute_hash(stdout_bytes),
|
173
273
|
)
|
174
|
-
|
274
|
+
|
275
|
+
if output.stderr is not None:
|
276
|
+
stderr_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stderr"
|
277
|
+
stderr_bytes: bytes = output.stderr.encode()
|
278
|
+
await self._blob_store.put(stderr_url, stderr_bytes, logger)
|
175
279
|
stderr = DataPayload(
|
176
|
-
path=
|
177
|
-
size=
|
178
|
-
sha256_hash=
|
280
|
+
path=stderr_url,
|
281
|
+
size=len(stderr_bytes),
|
282
|
+
sha256_hash=_compute_hash(stderr_bytes),
|
179
283
|
)
|
180
284
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
285
|
+
if output.function_output is not None:
|
286
|
+
for func_output_item in output.function_output.outputs:
|
287
|
+
node_output_sequence = len(data_payloads)
|
288
|
+
if output.reducer:
|
289
|
+
# Reducer tasks have to write their results into the same blob.
|
290
|
+
output_url = (
|
291
|
+
f"{output.output_payload_uri_prefix}.{node_output_sequence}"
|
292
|
+
)
|
293
|
+
else:
|
294
|
+
# Regular tasks write their results into different blobs made unique using task ids.
|
295
|
+
output_url = f"{output.output_payload_uri_prefix}.{output.task_id}.{node_output_sequence}"
|
296
|
+
|
297
|
+
output_bytes: bytes = (
|
298
|
+
func_output_item.bytes
|
299
|
+
if func_output_item.HasField("bytes")
|
300
|
+
else func_output_item.string.encode()
|
301
|
+
)
|
302
|
+
await self._blob_store.put(output_url, output_bytes, logger)
|
303
|
+
data_payloads.append(
|
304
|
+
DataPayload(
|
305
|
+
path=output_url,
|
306
|
+
size=len(output_bytes),
|
307
|
+
sha256_hash=_compute_hash(output_bytes),
|
308
|
+
)
|
309
|
+
)
|
310
|
+
|
311
|
+
return IngestFnOutputsResponse(
|
312
|
+
data_payloads=data_payloads,
|
193
313
|
stdout=stdout,
|
194
314
|
stderr=stderr,
|
195
|
-
output_encoding=_to_grpc_output_encoding(output),
|
196
|
-
output_encoding_version=0,
|
197
315
|
)
|
198
|
-
try:
|
199
|
-
stub = ExecutorAPIStub(await self._channel_manager.get_channel())
|
200
|
-
with (
|
201
|
-
metric_report_task_outcome_latency.time(),
|
202
|
-
metric_report_task_outcome_errors.count_exceptions(),
|
203
|
-
):
|
204
|
-
metric_report_task_outcome_rpcs.inc()
|
205
|
-
await stub.report_task_outcome(request, timeout=5.0)
|
206
|
-
except Exception as e:
|
207
|
-
logger.error("failed to report task outcome", error=e)
|
208
|
-
raise e
|
209
316
|
|
210
|
-
def _process_task_output(
|
211
|
-
self, output: TaskOutput
|
212
|
-
) -> Tuple[TaskResult, List[Any], TaskOutputSummary]:
|
317
|
+
def _process_task_output(self, output: TaskOutput) -> Tuple[TaskResult, List[Any]]:
|
213
318
|
task_result = TaskResult(
|
214
319
|
outcome="failure",
|
215
320
|
namespace=output.namespace,
|
@@ -220,9 +325,8 @@ class TaskReporter:
|
|
220
325
|
task_id=output.task_id,
|
221
326
|
)
|
222
327
|
output_files: List[Any] = []
|
223
|
-
summary: TaskOutputSummary = TaskOutputSummary()
|
224
328
|
if output is None:
|
225
|
-
return task_result, output_files
|
329
|
+
return task_result, output_files
|
226
330
|
|
227
331
|
task_result.outcome = (
|
228
332
|
TASK_OUTCOME_SUCCESS if output.success else TASK_OUTCOME_FAILURE
|
@@ -230,33 +334,19 @@ class TaskReporter:
|
|
230
334
|
task_result.reducer = output.reducer
|
231
335
|
|
232
336
|
_process_function_output(
|
233
|
-
function_output=output.function_output,
|
234
|
-
output_files=output_files,
|
235
|
-
summary=summary,
|
337
|
+
function_output=output.function_output, output_files=output_files
|
236
338
|
)
|
237
339
|
_process_router_output(
|
238
|
-
router_output=output.router_output, task_result=task_result
|
239
|
-
)
|
240
|
-
_process_stdout(
|
241
|
-
stdout=output.stdout, output_files=output_files, summary=summary
|
242
|
-
)
|
243
|
-
_process_stderr(
|
244
|
-
stderr=output.stderr, output_files=output_files, summary=summary
|
340
|
+
router_output=output.router_output, task_result=task_result
|
245
341
|
)
|
342
|
+
_process_stdout(stdout=output.stdout, output_files=output_files)
|
343
|
+
_process_stderr(stderr=output.stderr, output_files=output_files)
|
246
344
|
|
247
|
-
|
248
|
-
summary.output_total_bytes
|
249
|
-
+ summary.stdout_total_bytes
|
250
|
-
+ summary.stderr_total_bytes
|
251
|
-
)
|
252
|
-
|
253
|
-
return task_result, output_files, summary
|
345
|
+
return task_result, output_files
|
254
346
|
|
255
347
|
|
256
348
|
def _process_function_output(
|
257
|
-
function_output: Optional[FunctionOutput],
|
258
|
-
output_files: List[Any],
|
259
|
-
summary: TaskOutputSummary,
|
349
|
+
function_output: Optional[FunctionOutput], output_files: List[Any]
|
260
350
|
) -> None:
|
261
351
|
if function_output is None:
|
262
352
|
return
|
@@ -269,25 +359,19 @@ def _process_function_output(
|
|
269
359
|
(nanoid.generate(), payload, output.content_type),
|
270
360
|
)
|
271
361
|
)
|
272
|
-
summary.output_count += 1
|
273
|
-
summary.output_total_bytes += len(payload)
|
274
362
|
|
275
363
|
|
276
364
|
def _process_router_output(
|
277
365
|
router_output: Optional[RouterOutput],
|
278
366
|
task_result: TaskResult,
|
279
|
-
summary: TaskOutputSummary,
|
280
367
|
) -> None:
|
281
368
|
if router_output is None:
|
282
369
|
return
|
283
370
|
|
284
371
|
task_result.router_output = RouterOutput(edges=router_output.edges)
|
285
|
-
summary.router_output_count += 1
|
286
372
|
|
287
373
|
|
288
|
-
def _process_stdout(
|
289
|
-
stdout: Optional[str], output_files: List[Any], summary: TaskOutputSummary
|
290
|
-
) -> None:
|
374
|
+
def _process_stdout(stdout: Optional[str], output_files: List[Any]) -> None:
|
291
375
|
if stdout is None:
|
292
376
|
return
|
293
377
|
|
@@ -301,13 +385,9 @@ def _process_stdout(
|
|
301
385
|
),
|
302
386
|
)
|
303
387
|
)
|
304
|
-
summary.stdout_count += 1
|
305
|
-
summary.stdout_total_bytes += len(stdout)
|
306
388
|
|
307
389
|
|
308
|
-
def _process_stderr(
|
309
|
-
stderr: Optional[str], output_files: List[Any], summary: TaskOutputSummary
|
310
|
-
) -> None:
|
390
|
+
def _process_stderr(stderr: Optional[str], output_files: List[Any]) -> None:
|
311
391
|
if stderr is None:
|
312
392
|
return
|
313
393
|
|
@@ -321,8 +401,38 @@ def _process_stderr(
|
|
321
401
|
),
|
322
402
|
)
|
323
403
|
)
|
324
|
-
|
325
|
-
|
404
|
+
|
405
|
+
|
406
|
+
def _task_output_summary(output: TaskOutput) -> TaskOutputSummary:
|
407
|
+
summary: TaskOutputSummary = TaskOutputSummary()
|
408
|
+
|
409
|
+
if output.stdout is not None:
|
410
|
+
summary.stdout_count += 1
|
411
|
+
summary.stdout_total_bytes += len(output.stdout)
|
412
|
+
|
413
|
+
if output.stderr is not None:
|
414
|
+
summary.stderr_count += 1
|
415
|
+
summary.stderr_total_bytes += len(output.stderr)
|
416
|
+
|
417
|
+
if output.function_output is not None:
|
418
|
+
for func_output_item in output.function_output.outputs:
|
419
|
+
output_len: bytes = len(
|
420
|
+
func_output_item.bytes
|
421
|
+
if func_output_item.HasField("bytes")
|
422
|
+
else func_output_item.string
|
423
|
+
)
|
424
|
+
summary.output_count += 1
|
425
|
+
summary.output_total_bytes += output_len
|
426
|
+
|
427
|
+
if output.router_output is not None:
|
428
|
+
summary.router_output_count += 1
|
429
|
+
|
430
|
+
summary.total_bytes = (
|
431
|
+
summary.output_total_bytes
|
432
|
+
+ summary.stdout_total_bytes
|
433
|
+
+ summary.stderr_total_bytes
|
434
|
+
)
|
435
|
+
return summary
|
326
436
|
|
327
437
|
|
328
438
|
def _to_grpc_task_outcome(task_output: TaskOutput) -> TaskOutcome:
|
@@ -337,3 +447,16 @@ def _to_grpc_output_encoding(task_output: TaskOutput) -> OutputEncoding:
|
|
337
447
|
return OutputEncoding.OUTPUT_ENCODING_JSON
|
338
448
|
else:
|
339
449
|
return OutputEncoding.OUTPUT_ENCODING_PICKLE
|
450
|
+
|
451
|
+
|
452
|
+
def _to_grpc_data_payload_encoding(task_output: TaskOutput) -> DataPayloadEncoding:
|
453
|
+
if task_output.output_encoding == "json":
|
454
|
+
return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
|
455
|
+
else:
|
456
|
+
return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
|
457
|
+
|
458
|
+
|
459
|
+
def _compute_hash(data: bytes) -> str:
|
460
|
+
hasher = hashlib.sha256(usedforsecurity=False)
|
461
|
+
hasher.update(data)
|
462
|
+
return hasher.hexdigest()
|
indexify/executor/task_runner.py
CHANGED
@@ -85,6 +85,7 @@ class TaskRunner:
|
|
85
85
|
function_name=task_input.task.compute_fn,
|
86
86
|
graph_version=task_input.task.graph_version,
|
87
87
|
graph_invocation_id=task_input.task.invocation_id,
|
88
|
+
output_payload_uri_prefix=task_input.task.output_payload_uri_prefix,
|
88
89
|
)
|
89
90
|
finally:
|
90
91
|
if state is not None:
|
@@ -4,6 +4,28 @@ syntax = "proto3";
|
|
4
4
|
// Existing clients won't find the service if the package name changes.
|
5
5
|
package executor_api_pb;
|
6
6
|
|
7
|
+
// ===== DataPayload =====
|
8
|
+
enum DataPayloadEncoding {
|
9
|
+
DATA_PAYLOAD_ENCODING_UNKNOWN = 0;
|
10
|
+
// These encodings are currently mapping 1:1 to mime types.
|
11
|
+
// TODO: use SDK specific encodings becase 1:1 mapping might not work in the future.
|
12
|
+
DATA_PAYLOAD_ENCODING_UTF8_JSON = 1;
|
13
|
+
DATA_PAYLOAD_ENCODING_UTF8_TEXT = 2;
|
14
|
+
DATA_PAYLOAD_ENCODING_BINARY_PICKLE = 3;
|
15
|
+
}
|
16
|
+
|
17
|
+
message DataPayload {
|
18
|
+
optional string path = 1; // deprecated, TODO: remove when URI us used everywhere
|
19
|
+
optional uint64 size = 2;
|
20
|
+
optional string sha256_hash = 3;
|
21
|
+
// URI of the data.
|
22
|
+
// S3 URI if the data is stored in S3.
|
23
|
+
// Starts with "file://"" prefix if the data is stored on a local file system.
|
24
|
+
optional string uri = 4;
|
25
|
+
optional DataPayloadEncoding encoding = 5;
|
26
|
+
optional uint64 encoding_version = 6;
|
27
|
+
}
|
28
|
+
|
7
29
|
// ===== report_executor_state RPC =====
|
8
30
|
|
9
31
|
enum GPUModel {
|
@@ -72,6 +94,7 @@ message FunctionExecutorDescription {
|
|
72
94
|
optional HostResources resource_limits = 8;
|
73
95
|
// Timeout for customer code duration during FE creation.
|
74
96
|
optional uint32 customer_code_timeout_ms = 9;
|
97
|
+
optional DataPayload graph = 10;
|
75
98
|
}
|
76
99
|
|
77
100
|
message FunctionExecutorState {
|
@@ -112,6 +135,9 @@ message ExecutorState {
|
|
112
135
|
repeated FunctionExecutorState function_executor_states = 9;
|
113
136
|
map<string, string> labels = 10;
|
114
137
|
optional string state_hash = 11;
|
138
|
+
// Server supplied clock value of the latest desired executor state that was
|
139
|
+
// reconciled by Executor. Not included into state_hash.
|
140
|
+
optional uint64 server_clock = 12;
|
115
141
|
}
|
116
142
|
|
117
143
|
// A message sent by Executor to report its up to date state to Server.
|
@@ -131,9 +157,15 @@ message Task {
|
|
131
157
|
optional string graph_version = 4;
|
132
158
|
optional string function_name = 5;
|
133
159
|
optional string graph_invocation_id = 6;
|
134
|
-
optional string input_key = 8;
|
135
|
-
optional string reducer_output_key = 9;
|
160
|
+
optional string input_key = 8; // deprecated. TODO: remove when input is used everywhere
|
161
|
+
optional string reducer_output_key = 9; // deprecated. TODO: remove when reducer_input is used everywhere
|
136
162
|
optional uint32 timeout_ms = 10;
|
163
|
+
optional DataPayload input = 11;
|
164
|
+
optional DataPayload reducer_input = 12;
|
165
|
+
// URI prefix for the output payloads.
|
166
|
+
// S3 URI if the data is stored in S3.
|
167
|
+
// Starts with "file://"" prefix followed by an absolute directory path if the data is stored on a local file system.
|
168
|
+
optional string output_payload_uri_prefix = 13;
|
137
169
|
}
|
138
170
|
|
139
171
|
message TaskAllocation {
|
@@ -163,12 +195,6 @@ enum TaskOutcome {
|
|
163
195
|
TASK_OUTCOME_FAILURE = 2;
|
164
196
|
}
|
165
197
|
|
166
|
-
message DataPayload {
|
167
|
-
optional string path = 1;
|
168
|
-
optional uint64 size = 2;
|
169
|
-
optional string sha256_hash = 3;
|
170
|
-
}
|
171
|
-
|
172
198
|
enum OutputEncoding {
|
173
199
|
OUTPUT_ENCODING_UNKNOWN = 0;
|
174
200
|
OUTPUT_ENCODING_JSON = 1;
|
@@ -183,7 +209,7 @@ message ReportTaskOutcomeRequest {
|
|
183
209
|
optional string function_name = 4;
|
184
210
|
optional string graph_invocation_id = 6;
|
185
211
|
optional TaskOutcome outcome = 7;
|
186
|
-
optional string invocation_id = 8;
|
212
|
+
optional string invocation_id = 8; // deprecated. TODO: remove when graph_invocation_id is used everywhere
|
187
213
|
optional string executor_id = 9;
|
188
214
|
optional bool reducer = 10;
|
189
215
|
|
@@ -196,10 +222,10 @@ message ReportTaskOutcomeRequest {
|
|
196
222
|
optional DataPayload stdout = 14;
|
197
223
|
optional DataPayload stderr = 15;
|
198
224
|
// Output encoding of all the outputs of a function have to be same.
|
199
|
-
optional OutputEncoding output_encoding = 13;
|
225
|
+
optional OutputEncoding output_encoding = 13; // deprecated. TODO: remove when DataPayload.encoding is used everywhere
|
200
226
|
// This allows us to change how we encode the output from functions
|
201
227
|
// and serialize them into storage.
|
202
|
-
optional uint64 output_encoding_version = 5;
|
228
|
+
optional uint64 output_encoding_version = 5; // deprecated. TODO: remove when DataPayload.encoding_version is used everywhere
|
203
229
|
}
|
204
230
|
|
205
231
|
message ReportTaskOutcomeResponse {
|