indexify 0.3.19__py3-none-any.whl → 0.3.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. indexify/cli/cli.py +12 -0
  2. indexify/executor/api_objects.py +11 -6
  3. indexify/executor/blob_store/blob_store.py +69 -0
  4. indexify/executor/blob_store/local_fs_blob_store.py +48 -0
  5. indexify/executor/blob_store/metrics/blob_store.py +33 -0
  6. indexify/executor/blob_store/s3_blob_store.py +88 -0
  7. indexify/executor/downloader.py +192 -27
  8. indexify/executor/executor.py +29 -13
  9. indexify/executor/function_executor/function_executor.py +1 -1
  10. indexify/executor/function_executor/function_executor_states_container.py +5 -0
  11. indexify/executor/function_executor/function_executor_status.py +2 -0
  12. indexify/executor/function_executor/health_checker.py +7 -2
  13. indexify/executor/function_executor/invocation_state_client.py +4 -2
  14. indexify/executor/function_executor/single_task_runner.py +2 -0
  15. indexify/executor/function_executor/task_output.py +8 -1
  16. indexify/executor/grpc/channel_manager.py +4 -3
  17. indexify/executor/grpc/function_executor_controller.py +163 -193
  18. indexify/executor/grpc/metrics/state_reconciler.py +17 -0
  19. indexify/executor/grpc/metrics/task_controller.py +8 -0
  20. indexify/executor/grpc/state_reconciler.py +305 -188
  21. indexify/executor/grpc/state_reporter.py +18 -10
  22. indexify/executor/grpc/task_controller.py +247 -189
  23. indexify/executor/metrics/task_reporter.py +17 -0
  24. indexify/executor/task_reporter.py +217 -94
  25. indexify/executor/task_runner.py +1 -0
  26. indexify/proto/executor_api.proto +37 -11
  27. indexify/proto/executor_api_pb2.py +49 -47
  28. indexify/proto/executor_api_pb2.pyi +55 -15
  29. {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/METADATA +2 -1
  30. {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/RECORD +32 -27
  31. indexify/executor/grpc/completed_tasks_container.py +0 -26
  32. {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/WHEEL +0 -0
  33. {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import hashlib
2
3
  import time
3
4
  from typing import Any, List, Optional, Tuple
4
5
 
@@ -7,8 +8,9 @@ from httpx import Timeout
7
8
  from tensorlake.function_executor.proto.function_executor_pb2 import FunctionOutput
8
9
  from tensorlake.utils.http_client import get_httpx_client
9
10
 
11
+ from indexify.proto.executor_api_pb2 import DataPayload as DataPayloadProto
10
12
  from indexify.proto.executor_api_pb2 import (
11
- DataPayload,
13
+ DataPayloadEncoding,
12
14
  OutputEncoding,
13
15
  ReportTaskOutcomeRequest,
14
16
  TaskOutcome,
@@ -18,10 +20,12 @@ from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
18
20
  from .api_objects import (
19
21
  TASK_OUTCOME_FAILURE,
20
22
  TASK_OUTCOME_SUCCESS,
23
+ DataPayload,
21
24
  IngestFnOutputsResponse,
22
25
  RouterOutput,
23
26
  TaskResult,
24
27
  )
28
+ from .blob_store.blob_store import BLOBStore
25
29
  from .function_executor.task_output import TaskOutput
26
30
  from .grpc.channel_manager import ChannelManager
27
31
  from .metrics.task_reporter import (
@@ -31,6 +35,9 @@ from .metrics.task_reporter import (
31
35
  metric_server_ingest_files_errors,
32
36
  metric_server_ingest_files_latency,
33
37
  metric_server_ingest_files_requests,
38
+ metric_task_output_blob_store_upload_errors,
39
+ metric_task_output_blob_store_upload_latency,
40
+ metric_task_output_blob_store_uploads,
34
41
  )
35
42
 
36
43
 
@@ -62,6 +69,7 @@ class TaskReporter:
62
69
  base_url: str,
63
70
  executor_id: str,
64
71
  channel_manager: ChannelManager,
72
+ blob_store: BLOBStore,
65
73
  config_path: Optional[str] = None,
66
74
  ):
67
75
  self._base_url = base_url
@@ -74,8 +82,9 @@ class TaskReporter:
74
82
  # results in not reusing established TCP connections to server.
75
83
  self._client = get_httpx_client(config_path, make_async=False)
76
84
  self._channel_manager = channel_manager
85
+ self._blob_store = blob_store
77
86
 
78
- async def shutdown(self):
87
+ async def shutdown(self) -> None:
79
88
  """Shuts down the task reporter.
80
89
 
81
90
  Task reporter stops reporting all task outcomes to the Server.
@@ -84,7 +93,7 @@ class TaskReporter:
84
93
  """
85
94
  self._is_shutdown = True
86
95
 
87
- async def report(self, output: TaskOutput, logger: Any):
96
+ async def report(self, output: TaskOutput, logger: Any) -> None:
88
97
  """Reports result of the supplied task."""
89
98
  logger = logger.bind(module=__name__)
90
99
 
@@ -94,9 +103,13 @@ class TaskReporter:
94
103
  )
95
104
  return
96
105
 
97
- task_result, output_files, output_summary = self._process_task_output(output)
98
- task_result_data = task_result.model_dump_json(exclude_none=True)
106
+ # TODO: If the files are uploaded successfully,
107
+ # we should record that so that if we fail to report
108
+ # the task outcome, we don't retry the upload.
109
+ # This will save us some time and resources.
110
+ # It's good to do this once we delete all the legacy code paths.
99
111
 
112
+ output_summary: TaskOutputSummary = _task_output_summary(output)
100
113
  logger.info(
101
114
  "reporting task outcome",
102
115
  total_bytes=output_summary.total_bytes,
@@ -110,6 +123,80 @@ class TaskReporter:
110
123
  stderr_bytes=output_summary.stderr_total_bytes,
111
124
  )
112
125
 
126
+ if output.output_payload_uri_prefix is None:
127
+ ingested_files = await self._ingest_files_at_server(output, logger)
128
+ else:
129
+ ingested_files = await self._ingest_files_at_blob_store(output, logger)
130
+
131
+ fn_outputs = []
132
+ for data_payload in ingested_files.data_payloads:
133
+ fn_outputs.append(
134
+ DataPayloadProto(
135
+ path=data_payload.path, # TODO: stop using this deprecated field once Server side migration is done.
136
+ uri=data_payload.path,
137
+ size=data_payload.size,
138
+ sha256_hash=data_payload.sha256_hash,
139
+ encoding=_to_grpc_data_payload_encoding(output),
140
+ encoding_version=0,
141
+ )
142
+ )
143
+ stdout, stderr = None, None
144
+ if ingested_files.stdout is not None:
145
+ stdout = DataPayloadProto(
146
+ path=ingested_files.stdout.path, # TODO: stop using this deprecated field once Server side migration is done.
147
+ uri=ingested_files.stdout.path,
148
+ size=ingested_files.stdout.size,
149
+ sha256_hash=ingested_files.stdout.sha256_hash,
150
+ encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
151
+ encoding_version=0,
152
+ )
153
+ if ingested_files.stderr is not None:
154
+ stderr = DataPayloadProto(
155
+ path=ingested_files.stderr.path, # TODO: stop using this deprecated field once Server side migration is done.
156
+ uri=ingested_files.stderr.path,
157
+ size=ingested_files.stderr.size,
158
+ sha256_hash=ingested_files.stderr.sha256_hash,
159
+ encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
160
+ encoding_version=0,
161
+ )
162
+
163
+ request = ReportTaskOutcomeRequest(
164
+ task_id=output.task_id,
165
+ namespace=output.namespace,
166
+ graph_name=output.graph_name,
167
+ function_name=output.function_name,
168
+ graph_invocation_id=output.graph_invocation_id,
169
+ outcome=_to_grpc_task_outcome(output),
170
+ invocation_id=output.graph_invocation_id,
171
+ executor_id=self._executor_id,
172
+ reducer=output.reducer,
173
+ next_functions=(output.router_output.edges if output.router_output else []),
174
+ fn_outputs=fn_outputs,
175
+ stdout=stdout,
176
+ stderr=stderr,
177
+ output_encoding=_to_grpc_output_encoding(output),
178
+ output_encoding_version=0,
179
+ )
180
+ try:
181
+ stub = ExecutorAPIStub(await self._channel_manager.get_channel())
182
+ with (
183
+ metric_report_task_outcome_latency.time(),
184
+ metric_report_task_outcome_errors.count_exceptions(),
185
+ ):
186
+ metric_report_task_outcome_rpcs.inc()
187
+ await stub.report_task_outcome(request, timeout=5.0)
188
+ except Exception as e:
189
+ logger.error("failed to report task outcome", error=e)
190
+ raise e
191
+
192
+ async def _ingest_files_at_server(
193
+ self, output: TaskOutput, logger: Any
194
+ ) -> IngestFnOutputsResponse:
195
+ logger.warning("uploading task output files to server (deprecated mode)")
196
+
197
+ task_result, output_files = self._process_task_output(output)
198
+ task_result_data = task_result.model_dump_json(exclude_none=True)
199
+
113
200
  kwargs = {
114
201
  "data": {"task_result": task_result_data},
115
202
  # Use httpx default timeout of 5s for all timeout types.
@@ -132,7 +219,7 @@ class TaskReporter:
132
219
  )
133
220
  end_time = time.time()
134
221
  logger.info(
135
- "files uploaded",
222
+ "files uploaded to server",
136
223
  response_time=end_time - start_time,
137
224
  response_code=response.status_code,
138
225
  )
@@ -148,68 +235,86 @@ class TaskReporter:
148
235
  f"Response text: '{response.text}'."
149
236
  ) from e
150
237
 
151
- # TODO: If the files are uploaded successfully,
152
- # we should record that so that if we fail to report
153
- # the task outcome, we don't retry the upload.
154
- # This will save us some time and resources.
155
-
156
238
  ingested_files_response = response.json()
157
- ingested_files = IngestFnOutputsResponse.model_validate(ingested_files_response)
158
- fn_outputs = []
159
- for data_payload in ingested_files.data_payloads:
160
- fn_outputs.append(
161
- DataPayload(
162
- path=data_payload.path,
163
- size=data_payload.size,
164
- sha256_hash=data_payload.sha256_hash,
165
- )
166
- )
167
- stdout, stderr = None, None
168
- if ingested_files.stdout:
239
+ return IngestFnOutputsResponse.model_validate(ingested_files_response)
240
+
241
+ async def _ingest_files_at_blob_store(
242
+ self, output: TaskOutput, logger: Any
243
+ ) -> IngestFnOutputsResponse:
244
+ start_time = time.time()
245
+ with (
246
+ metric_task_output_blob_store_upload_latency.time(),
247
+ metric_task_output_blob_store_upload_errors.count_exceptions(),
248
+ ):
249
+ metric_task_output_blob_store_uploads.inc()
250
+ response = await self._upload_output_to_blob_store(output, logger)
251
+
252
+ logger.info(
253
+ "files uploaded to blob store",
254
+ duration=time.time() - start_time,
255
+ )
256
+ return response
257
+
258
+ async def _upload_output_to_blob_store(
259
+ self, output: TaskOutput, logger: Any
260
+ ) -> IngestFnOutputsResponse:
261
+ data_payloads: List[DataPayload] = []
262
+ stdout: Optional[DataPayload] = None
263
+ stderr: Optional[DataPayload] = None
264
+
265
+ if output.stdout is not None:
266
+ stdout_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stdout"
267
+ stdout_bytes: bytes = output.stdout.encode()
268
+ await self._blob_store.put(stdout_url, stdout_bytes, logger)
169
269
  stdout = DataPayload(
170
- path=ingested_files.stdout.path,
171
- size=ingested_files.stdout.size,
172
- sha256_hash=ingested_files.stdout.sha256_hash,
270
+ path=stdout_url,
271
+ size=len(stdout_bytes),
272
+ sha256_hash=_compute_hash(stdout_bytes),
173
273
  )
174
- if ingested_files.stderr:
274
+
275
+ if output.stderr is not None:
276
+ stderr_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stderr"
277
+ stderr_bytes: bytes = output.stderr.encode()
278
+ await self._blob_store.put(stderr_url, stderr_bytes, logger)
175
279
  stderr = DataPayload(
176
- path=ingested_files.stderr.path,
177
- size=ingested_files.stderr.size,
178
- sha256_hash=ingested_files.stderr.sha256_hash,
280
+ path=stderr_url,
281
+ size=len(stderr_bytes),
282
+ sha256_hash=_compute_hash(stderr_bytes),
179
283
  )
180
284
 
181
- request = ReportTaskOutcomeRequest(
182
- task_id=output.task_id,
183
- namespace=output.namespace,
184
- graph_name=output.graph_name,
185
- function_name=output.function_name,
186
- graph_invocation_id=output.graph_invocation_id,
187
- outcome=_to_grpc_task_outcome(output),
188
- invocation_id=output.graph_invocation_id,
189
- executor_id=self._executor_id,
190
- reducer=output.reducer,
191
- next_functions=(output.router_output.edges if output.router_output else []),
192
- fn_outputs=fn_outputs,
285
+ if output.function_output is not None:
286
+ for func_output_item in output.function_output.outputs:
287
+ node_output_sequence = len(data_payloads)
288
+ if output.reducer:
289
+ # Reducer tasks have to write their results into the same blob.
290
+ output_url = (
291
+ f"{output.output_payload_uri_prefix}.{node_output_sequence}"
292
+ )
293
+ else:
294
+ # Regular tasks write their results into different blobs made unique using task ids.
295
+ output_url = f"{output.output_payload_uri_prefix}.{output.task_id}.{node_output_sequence}"
296
+
297
+ output_bytes: bytes = (
298
+ func_output_item.bytes
299
+ if func_output_item.HasField("bytes")
300
+ else func_output_item.string.encode()
301
+ )
302
+ await self._blob_store.put(output_url, output_bytes, logger)
303
+ data_payloads.append(
304
+ DataPayload(
305
+ path=output_url,
306
+ size=len(output_bytes),
307
+ sha256_hash=_compute_hash(output_bytes),
308
+ )
309
+ )
310
+
311
+ return IngestFnOutputsResponse(
312
+ data_payloads=data_payloads,
193
313
  stdout=stdout,
194
314
  stderr=stderr,
195
- output_encoding=_to_grpc_output_encoding(output),
196
- output_encoding_version=0,
197
315
  )
198
- try:
199
- stub = ExecutorAPIStub(await self._channel_manager.get_channel())
200
- with (
201
- metric_report_task_outcome_latency.time(),
202
- metric_report_task_outcome_errors.count_exceptions(),
203
- ):
204
- metric_report_task_outcome_rpcs.inc()
205
- await stub.report_task_outcome(request, timeout=5.0)
206
- except Exception as e:
207
- logger.error("failed to report task outcome", error=e)
208
- raise e
209
316
 
210
- def _process_task_output(
211
- self, output: TaskOutput
212
- ) -> Tuple[TaskResult, List[Any], TaskOutputSummary]:
317
+ def _process_task_output(self, output: TaskOutput) -> Tuple[TaskResult, List[Any]]:
213
318
  task_result = TaskResult(
214
319
  outcome="failure",
215
320
  namespace=output.namespace,
@@ -220,9 +325,8 @@ class TaskReporter:
220
325
  task_id=output.task_id,
221
326
  )
222
327
  output_files: List[Any] = []
223
- summary: TaskOutputSummary = TaskOutputSummary()
224
328
  if output is None:
225
- return task_result, output_files, summary
329
+ return task_result, output_files
226
330
 
227
331
  task_result.outcome = (
228
332
  TASK_OUTCOME_SUCCESS if output.success else TASK_OUTCOME_FAILURE
@@ -230,33 +334,19 @@ class TaskReporter:
230
334
  task_result.reducer = output.reducer
231
335
 
232
336
  _process_function_output(
233
- function_output=output.function_output,
234
- output_files=output_files,
235
- summary=summary,
337
+ function_output=output.function_output, output_files=output_files
236
338
  )
237
339
  _process_router_output(
238
- router_output=output.router_output, task_result=task_result, summary=summary
239
- )
240
- _process_stdout(
241
- stdout=output.stdout, output_files=output_files, summary=summary
242
- )
243
- _process_stderr(
244
- stderr=output.stderr, output_files=output_files, summary=summary
340
+ router_output=output.router_output, task_result=task_result
245
341
  )
342
+ _process_stdout(stdout=output.stdout, output_files=output_files)
343
+ _process_stderr(stderr=output.stderr, output_files=output_files)
246
344
 
247
- summary.total_bytes = (
248
- summary.output_total_bytes
249
- + summary.stdout_total_bytes
250
- + summary.stderr_total_bytes
251
- )
252
-
253
- return task_result, output_files, summary
345
+ return task_result, output_files
254
346
 
255
347
 
256
348
  def _process_function_output(
257
- function_output: Optional[FunctionOutput],
258
- output_files: List[Any],
259
- summary: TaskOutputSummary,
349
+ function_output: Optional[FunctionOutput], output_files: List[Any]
260
350
  ) -> None:
261
351
  if function_output is None:
262
352
  return
@@ -269,25 +359,19 @@ def _process_function_output(
269
359
  (nanoid.generate(), payload, output.content_type),
270
360
  )
271
361
  )
272
- summary.output_count += 1
273
- summary.output_total_bytes += len(payload)
274
362
 
275
363
 
276
364
  def _process_router_output(
277
365
  router_output: Optional[RouterOutput],
278
366
  task_result: TaskResult,
279
- summary: TaskOutputSummary,
280
367
  ) -> None:
281
368
  if router_output is None:
282
369
  return
283
370
 
284
371
  task_result.router_output = RouterOutput(edges=router_output.edges)
285
- summary.router_output_count += 1
286
372
 
287
373
 
288
- def _process_stdout(
289
- stdout: Optional[str], output_files: List[Any], summary: TaskOutputSummary
290
- ) -> None:
374
+ def _process_stdout(stdout: Optional[str], output_files: List[Any]) -> None:
291
375
  if stdout is None:
292
376
  return
293
377
 
@@ -301,13 +385,9 @@ def _process_stdout(
301
385
  ),
302
386
  )
303
387
  )
304
- summary.stdout_count += 1
305
- summary.stdout_total_bytes += len(stdout)
306
388
 
307
389
 
308
- def _process_stderr(
309
- stderr: Optional[str], output_files: List[Any], summary: TaskOutputSummary
310
- ) -> None:
390
+ def _process_stderr(stderr: Optional[str], output_files: List[Any]) -> None:
311
391
  if stderr is None:
312
392
  return
313
393
 
@@ -321,8 +401,38 @@ def _process_stderr(
321
401
  ),
322
402
  )
323
403
  )
324
- summary.stderr_count += 1
325
- summary.stderr_total_bytes += len(stderr)
404
+
405
+
406
+ def _task_output_summary(output: TaskOutput) -> TaskOutputSummary:
407
+ summary: TaskOutputSummary = TaskOutputSummary()
408
+
409
+ if output.stdout is not None:
410
+ summary.stdout_count += 1
411
+ summary.stdout_total_bytes += len(output.stdout)
412
+
413
+ if output.stderr is not None:
414
+ summary.stderr_count += 1
415
+ summary.stderr_total_bytes += len(output.stderr)
416
+
417
+ if output.function_output is not None:
418
+ for func_output_item in output.function_output.outputs:
419
+ output_len: bytes = len(
420
+ func_output_item.bytes
421
+ if func_output_item.HasField("bytes")
422
+ else func_output_item.string
423
+ )
424
+ summary.output_count += 1
425
+ summary.output_total_bytes += output_len
426
+
427
+ if output.router_output is not None:
428
+ summary.router_output_count += 1
429
+
430
+ summary.total_bytes = (
431
+ summary.output_total_bytes
432
+ + summary.stdout_total_bytes
433
+ + summary.stderr_total_bytes
434
+ )
435
+ return summary
326
436
 
327
437
 
328
438
  def _to_grpc_task_outcome(task_output: TaskOutput) -> TaskOutcome:
@@ -337,3 +447,16 @@ def _to_grpc_output_encoding(task_output: TaskOutput) -> OutputEncoding:
337
447
  return OutputEncoding.OUTPUT_ENCODING_JSON
338
448
  else:
339
449
  return OutputEncoding.OUTPUT_ENCODING_PICKLE
450
+
451
+
452
+ def _to_grpc_data_payload_encoding(task_output: TaskOutput) -> DataPayloadEncoding:
453
+ if task_output.output_encoding == "json":
454
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
455
+ else:
456
+ return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
457
+
458
+
459
+ def _compute_hash(data: bytes) -> str:
460
+ hasher = hashlib.sha256(usedforsecurity=False)
461
+ hasher.update(data)
462
+ return hasher.hexdigest()
@@ -85,6 +85,7 @@ class TaskRunner:
85
85
  function_name=task_input.task.compute_fn,
86
86
  graph_version=task_input.task.graph_version,
87
87
  graph_invocation_id=task_input.task.invocation_id,
88
+ output_payload_uri_prefix=task_input.task.output_payload_uri_prefix,
88
89
  )
89
90
  finally:
90
91
  if state is not None:
@@ -4,6 +4,28 @@ syntax = "proto3";
4
4
  // Existing clients won't find the service if the package name changes.
5
5
  package executor_api_pb;
6
6
 
7
+ // ===== DataPayload =====
8
+ enum DataPayloadEncoding {
9
+ DATA_PAYLOAD_ENCODING_UNKNOWN = 0;
10
+ // These encodings are currently mapping 1:1 to mime types.
11
+ // TODO: use SDK specific encodings becase 1:1 mapping might not work in the future.
12
+ DATA_PAYLOAD_ENCODING_UTF8_JSON = 1;
13
+ DATA_PAYLOAD_ENCODING_UTF8_TEXT = 2;
14
+ DATA_PAYLOAD_ENCODING_BINARY_PICKLE = 3;
15
+ }
16
+
17
+ message DataPayload {
18
+ optional string path = 1; // deprecated, TODO: remove when URI us used everywhere
19
+ optional uint64 size = 2;
20
+ optional string sha256_hash = 3;
21
+ // URI of the data.
22
+ // S3 URI if the data is stored in S3.
23
+ // Starts with "file://"" prefix if the data is stored on a local file system.
24
+ optional string uri = 4;
25
+ optional DataPayloadEncoding encoding = 5;
26
+ optional uint64 encoding_version = 6;
27
+ }
28
+
7
29
  // ===== report_executor_state RPC =====
8
30
 
9
31
  enum GPUModel {
@@ -72,6 +94,7 @@ message FunctionExecutorDescription {
72
94
  optional HostResources resource_limits = 8;
73
95
  // Timeout for customer code duration during FE creation.
74
96
  optional uint32 customer_code_timeout_ms = 9;
97
+ optional DataPayload graph = 10;
75
98
  }
76
99
 
77
100
  message FunctionExecutorState {
@@ -112,6 +135,9 @@ message ExecutorState {
112
135
  repeated FunctionExecutorState function_executor_states = 9;
113
136
  map<string, string> labels = 10;
114
137
  optional string state_hash = 11;
138
+ // Server supplied clock value of the latest desired executor state that was
139
+ // reconciled by Executor. Not included into state_hash.
140
+ optional uint64 server_clock = 12;
115
141
  }
116
142
 
117
143
  // A message sent by Executor to report its up to date state to Server.
@@ -131,9 +157,15 @@ message Task {
131
157
  optional string graph_version = 4;
132
158
  optional string function_name = 5;
133
159
  optional string graph_invocation_id = 6;
134
- optional string input_key = 8;
135
- optional string reducer_output_key = 9;
160
+ optional string input_key = 8; // deprecated. TODO: remove when input is used everywhere
161
+ optional string reducer_output_key = 9; // deprecated. TODO: remove when reducer_input is used everywhere
136
162
  optional uint32 timeout_ms = 10;
163
+ optional DataPayload input = 11;
164
+ optional DataPayload reducer_input = 12;
165
+ // URI prefix for the output payloads.
166
+ // S3 URI if the data is stored in S3.
167
+ // Starts with "file://"" prefix followed by an absolute directory path if the data is stored on a local file system.
168
+ optional string output_payload_uri_prefix = 13;
137
169
  }
138
170
 
139
171
  message TaskAllocation {
@@ -163,12 +195,6 @@ enum TaskOutcome {
163
195
  TASK_OUTCOME_FAILURE = 2;
164
196
  }
165
197
 
166
- message DataPayload {
167
- optional string path = 1;
168
- optional uint64 size = 2;
169
- optional string sha256_hash = 3;
170
- }
171
-
172
198
  enum OutputEncoding {
173
199
  OUTPUT_ENCODING_UNKNOWN = 0;
174
200
  OUTPUT_ENCODING_JSON = 1;
@@ -183,7 +209,7 @@ message ReportTaskOutcomeRequest {
183
209
  optional string function_name = 4;
184
210
  optional string graph_invocation_id = 6;
185
211
  optional TaskOutcome outcome = 7;
186
- optional string invocation_id = 8;
212
+ optional string invocation_id = 8; // deprecated. TODO: remove when graph_invocation_id is used everywhere
187
213
  optional string executor_id = 9;
188
214
  optional bool reducer = 10;
189
215
 
@@ -196,10 +222,10 @@ message ReportTaskOutcomeRequest {
196
222
  optional DataPayload stdout = 14;
197
223
  optional DataPayload stderr = 15;
198
224
  // Output encoding of all the outputs of a function have to be same.
199
- optional OutputEncoding output_encoding = 13;
225
+ optional OutputEncoding output_encoding = 13; // deprecated. TODO: remove when DataPayload.encoding is used everywhere
200
226
  // This allows us to change how we encode the output from functions
201
227
  // and serialize them into storage.
202
- optional uint64 output_encoding_version = 5;
228
+ optional uint64 output_encoding_version = 5; // deprecated. TODO: remove when DataPayload.encoding_version is used everywhere
203
229
  }
204
230
 
205
231
  message ReportTaskOutcomeResponse {