indexify 0.3.31__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. indexify/cli/__init__.py +18 -0
  2. indexify/cli/build_image.py +51 -0
  3. indexify/cli/deploy.py +57 -0
  4. indexify/cli/executor.py +205 -0
  5. indexify/executor/{grpc/channel_manager.py → channel_manager.py} +17 -11
  6. indexify/executor/executor.py +57 -313
  7. indexify/executor/function_allowlist.py +59 -0
  8. indexify/executor/function_executor/function_executor.py +12 -6
  9. indexify/executor/function_executor/invocation_state_client.py +25 -3
  10. indexify/executor/function_executor/server/function_executor_server_factory.py +3 -3
  11. indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -11
  12. indexify/executor/function_executor_controller/__init__.py +13 -0
  13. indexify/executor/function_executor_controller/completed_task_metrics.py +82 -0
  14. indexify/executor/function_executor_controller/create_function_executor.py +158 -0
  15. indexify/executor/function_executor_controller/debug_event_loop.py +37 -0
  16. indexify/executor/function_executor_controller/destroy_function_executor.py +28 -0
  17. indexify/executor/function_executor_controller/downloads.py +199 -0
  18. indexify/executor/function_executor_controller/events.py +172 -0
  19. indexify/executor/function_executor_controller/function_executor_controller.py +759 -0
  20. indexify/executor/function_executor_controller/loggers.py +57 -0
  21. indexify/executor/function_executor_controller/message_validators.py +69 -0
  22. indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +68 -0
  23. indexify/executor/{metrics/downloader.py → function_executor_controller/metrics/downloads.py} +1 -3
  24. indexify/executor/function_executor_controller/metrics/function_executor_controller.py +60 -0
  25. indexify/executor/{function_executor/metrics/single_task_runner.py → function_executor_controller/metrics/run_task.py} +9 -3
  26. indexify/executor/function_executor_controller/metrics/upload_task_output.py +39 -0
  27. indexify/executor/function_executor_controller/prepare_task.py +38 -0
  28. indexify/executor/function_executor_controller/run_task.py +201 -0
  29. indexify/executor/function_executor_controller/task_info.py +33 -0
  30. indexify/executor/function_executor_controller/task_output.py +122 -0
  31. indexify/executor/function_executor_controller/upload_task_output.py +234 -0
  32. indexify/executor/host_resources/host_resources.py +20 -25
  33. indexify/executor/host_resources/nvidia_gpu_allocator.py +8 -1
  34. indexify/executor/{grpc/metrics → metrics}/channel_manager.py +1 -1
  35. indexify/executor/metrics/executor.py +0 -47
  36. indexify/executor/{grpc/metrics → metrics}/state_reconciler.py +1 -1
  37. indexify/executor/{grpc/metrics → metrics}/state_reporter.py +1 -1
  38. indexify/executor/monitoring/health_checker/generic_health_checker.py +6 -59
  39. indexify/executor/monitoring/health_checker/health_checker.py +0 -11
  40. indexify/executor/{grpc/state_reconciler.py → state_reconciler.py} +139 -141
  41. indexify/executor/state_reporter.py +364 -0
  42. indexify/proto/executor_api.proto +68 -60
  43. indexify/proto/executor_api_pb2.py +52 -52
  44. indexify/proto/executor_api_pb2.pyi +129 -108
  45. indexify/proto/executor_api_pb2_grpc.py +0 -47
  46. {indexify-0.3.31.dist-info → indexify-0.4.3.dist-info}/METADATA +2 -5
  47. indexify-0.4.3.dist-info/RECORD +68 -0
  48. indexify-0.4.3.dist-info/entry_points.txt +3 -0
  49. indexify/cli/cli.py +0 -268
  50. indexify/executor/api_objects.py +0 -92
  51. indexify/executor/downloader.py +0 -417
  52. indexify/executor/executor_flavor.py +0 -7
  53. indexify/executor/function_executor/function_executor_state.py +0 -107
  54. indexify/executor/function_executor/function_executor_states_container.py +0 -93
  55. indexify/executor/function_executor/function_executor_status.py +0 -95
  56. indexify/executor/function_executor/metrics/function_executor_state.py +0 -46
  57. indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -10
  58. indexify/executor/function_executor/single_task_runner.py +0 -345
  59. indexify/executor/function_executor/task_input.py +0 -21
  60. indexify/executor/function_executor/task_output.py +0 -105
  61. indexify/executor/grpc/function_executor_controller.py +0 -418
  62. indexify/executor/grpc/metrics/task_controller.py +0 -8
  63. indexify/executor/grpc/state_reporter.py +0 -317
  64. indexify/executor/grpc/task_controller.py +0 -508
  65. indexify/executor/metrics/task_fetcher.py +0 -21
  66. indexify/executor/metrics/task_reporter.py +0 -53
  67. indexify/executor/metrics/task_runner.py +0 -52
  68. indexify/executor/monitoring/function_allowlist.py +0 -25
  69. indexify/executor/runtime_probes.py +0 -68
  70. indexify/executor/task_fetcher.py +0 -96
  71. indexify/executor/task_reporter.py +0 -459
  72. indexify/executor/task_runner.py +0 -177
  73. indexify-0.3.31.dist-info/RECORD +0 -68
  74. indexify-0.3.31.dist-info/entry_points.txt +0 -3
  75. {indexify-0.3.31.dist-info → indexify-0.4.3.dist-info}/WHEEL +0 -0
@@ -1,68 +0,0 @@
1
- import os
2
- import platform
3
- import sys
4
- from typing import Any, Dict, Tuple
5
-
6
- from pydantic import BaseModel
7
-
8
- DEFAULT_EXECUTOR = "tensorlake/indexify-executor-default"
9
- # Empty string is used as a default hash which tells the scheduler to accept any hash.
10
- DEFAULT_HASH = ""
11
-
12
-
13
- class ProbeInfo(BaseModel):
14
- image_name: str
15
- image_hash: str
16
- python_major_version: int
17
- labels: Dict[str, Any] = {}
18
- is_default_executor: bool
19
-
20
-
21
- class RuntimeProbes:
22
- def __init__(self) -> None:
23
- self._image_name = self._read_image_name()
24
- self._image_hash = self._read_image_hash()
25
- self._os_name = platform.system()
26
- self._architecture = platform.machine()
27
- (
28
- self._python_version_major,
29
- self._python_version_minor,
30
- ) = self._get_python_version()
31
-
32
- def _read_image_name(self) -> str:
33
- file_path = os.path.expanduser("~/.indexify/image_name")
34
- if os.path.exists(file_path):
35
- with open(file_path, "r") as file:
36
- return file.read().strip()
37
- return DEFAULT_EXECUTOR
38
-
39
- def _read_image_hash(self) -> str:
40
- file_path = os.path.expanduser("~/.indexify/image_hash")
41
- if os.path.exists(file_path):
42
- with open(file_path, "r") as file:
43
- return file.read().strip()
44
- return DEFAULT_HASH
45
-
46
- def _get_python_version(self) -> Tuple[int, int]:
47
- version_info = sys.version_info
48
- return version_info.major, version_info.minor
49
-
50
- def _is_default_executor(self):
51
- return True if self._read_image_name() == DEFAULT_EXECUTOR else False
52
-
53
- def probe(self) -> ProbeInfo:
54
- labels = {
55
- "os": self._os_name,
56
- "image_name": self._image_name,
57
- "architecture": self._architecture,
58
- "python_major_version": self._python_version_major,
59
- "python_minor_version": self._python_version_minor,
60
- }
61
-
62
- return ProbeInfo(
63
- image_name=self._image_name,
64
- image_hash=self._image_hash,
65
- python_major_version=self._python_version_major,
66
- labels=labels,
67
- is_default_executor=self._is_default_executor(),
68
- )
@@ -1,96 +0,0 @@
1
- import json
2
- import time
3
- from socket import gethostname
4
- from typing import AsyncGenerator, Dict, List, Optional
5
-
6
- import structlog
7
- from httpx_sse import aconnect_sse
8
- from tensorlake.utils.http_client import get_httpx_client
9
-
10
- from .api_objects import ExecutorMetadata, FunctionURI, Task
11
- from .metrics.task_fetcher import (
12
- metric_server_registration_errors,
13
- metric_server_registration_latency,
14
- metric_server_registrations,
15
- )
16
- from .runtime_probes import ProbeInfo, RuntimeProbes
17
-
18
-
19
- class TaskFetcher:
20
- """Registers with Indexify server and fetches tasks from it."""
21
-
22
- def __init__(
23
- self,
24
- executor_id: str,
25
- executor_version: str,
26
- labels: Dict[str, str],
27
- function_allowlist: Optional[List[FunctionURI]],
28
- protocol: str,
29
- indexify_server_addr: str,
30
- config_path: Optional[str] = None,
31
- ):
32
- self._protocol: str = protocol
33
- self._indexify_server_addr: str = indexify_server_addr
34
- self.config_path = config_path
35
- self._logger = structlog.get_logger(module=__name__)
36
-
37
- probe_info: ProbeInfo = RuntimeProbes().probe()
38
- all_labels = probe_info.labels.copy()
39
- all_labels.update(labels)
40
-
41
- self._executor_metadata: ExecutorMetadata = ExecutorMetadata(
42
- id=executor_id,
43
- executor_version=executor_version,
44
- addr=gethostname(),
45
- function_allowlist=function_allowlist,
46
- labels=all_labels,
47
- )
48
-
49
- async def run(self) -> AsyncGenerator[Task, None]:
50
- """Fetches tasks that Indexify server assigned to the Executor.
51
-
52
- Raises an exception if error occurred."""
53
- url = f"{self._protocol}://{self._indexify_server_addr}/internal/executors/{self._executor_metadata.id}/tasks"
54
-
55
- self._logger.info(
56
- "registering_executor",
57
- executor_id=self._executor_metadata.id,
58
- url=url,
59
- executor_version=self._executor_metadata.executor_version,
60
- )
61
- metric_server_registrations.inc()
62
- registration_start_time: float = time.monotonic()
63
-
64
- async with get_httpx_client(
65
- config_path=self.config_path, make_async=True
66
- ) as client:
67
- async with aconnect_sse(
68
- client,
69
- "POST",
70
- url,
71
- json=self._executor_metadata.model_dump(),
72
- headers={"Content-Type": "application/json"},
73
- ) as event_source:
74
- try:
75
- event_source.response.raise_for_status()
76
- except Exception as e:
77
- metric_server_registration_errors.inc()
78
- await event_source.response.aread()
79
- raise Exception(
80
- "failed to register at server. "
81
- f"Response code: {event_source.response.status_code}. "
82
- f"Response text: '{event_source.response.text}'."
83
- ) from e
84
- finally:
85
- metric_server_registration_latency.observe(
86
- time.monotonic() - registration_start_time
87
- )
88
-
89
- self._logger.info(
90
- "executor_registered", executor_id=self._executor_metadata.id
91
- )
92
-
93
- async for sse in event_source.aiter_sse():
94
- task_dicts = json.loads(sse.data)
95
- for task_dict in task_dicts:
96
- yield Task.model_validate(task_dict, strict=False)
@@ -1,459 +0,0 @@
1
- import asyncio
2
- import hashlib
3
- import time
4
- from typing import Any, List, Optional, Tuple
5
-
6
- import nanoid
7
- from httpx import Timeout
8
- from tensorlake.function_executor.proto.function_executor_pb2 import FunctionOutput
9
- from tensorlake.utils.http_client import get_httpx_client
10
-
11
- from indexify.proto.executor_api_pb2 import DataPayload as DataPayloadProto
12
- from indexify.proto.executor_api_pb2 import (
13
- DataPayloadEncoding,
14
- OutputEncoding,
15
- ReportTaskOutcomeRequest,
16
- TaskOutcome,
17
- )
18
- from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
19
-
20
- from .api_objects import (
21
- TASK_OUTCOME_FAILURE,
22
- TASK_OUTCOME_SUCCESS,
23
- DataPayload,
24
- IngestFnOutputsResponse,
25
- RouterOutput,
26
- TaskResult,
27
- )
28
- from .blob_store.blob_store import BLOBStore
29
- from .function_executor.task_output import TaskOutput
30
- from .grpc.channel_manager import ChannelManager
31
- from .metrics.task_reporter import (
32
- metric_report_task_outcome_errors,
33
- metric_report_task_outcome_latency,
34
- metric_report_task_outcome_rpcs,
35
- metric_server_ingest_files_errors,
36
- metric_server_ingest_files_latency,
37
- metric_server_ingest_files_requests,
38
- metric_task_output_blob_store_upload_errors,
39
- metric_task_output_blob_store_upload_latency,
40
- metric_task_output_blob_store_uploads,
41
- )
42
-
43
-
44
- # https://github.com/psf/requests/issues/1081#issuecomment-428504128
45
- class ForceMultipartDict(dict):
46
- def __bool__(self):
47
- return True
48
-
49
-
50
- FORCE_MULTIPART = ForceMultipartDict()
51
- UTF_8_CONTENT_TYPE = "application/octet-stream"
52
-
53
-
54
- class TaskOutputSummary:
55
- def __init__(self):
56
- self.output_count: int = 0
57
- self.output_total_bytes: int = 0
58
- self.router_output_count: int = 0
59
- self.stdout_count: int = 0
60
- self.stdout_total_bytes: int = 0
61
- self.stderr_count: int = 0
62
- self.stderr_total_bytes: int = 0
63
- self.total_bytes: int = 0
64
-
65
-
66
- class TaskReporter:
67
- def __init__(
68
- self,
69
- base_url: str,
70
- executor_id: str,
71
- channel_manager: ChannelManager,
72
- blob_store: BLOBStore,
73
- config_path: Optional[str] = None,
74
- ):
75
- self._base_url = base_url
76
- self._executor_id = executor_id
77
- self._is_shutdown = False
78
- # Use thread-safe sync client due to issues with async client.
79
- # Async client attempts to use connections it already closed.
80
- # See e.g. https://github.com/encode/httpx/issues/2337.
81
- # Creating a new async client for each request fixes this but it
82
- # results in not reusing established TCP connections to server.
83
- self._client = get_httpx_client(config_path, make_async=False)
84
- self._channel_manager = channel_manager
85
- self._blob_store = blob_store
86
-
87
- async def shutdown(self) -> None:
88
- """Shuts down the task reporter.
89
-
90
- Task reporter stops reporting all task outcomes to the Server.
91
- There are many task failures due to Executor shutdown. We give wrong
92
- signals to Server if we report such failures.
93
- """
94
- self._is_shutdown = True
95
-
96
- async def report(self, output: TaskOutput, logger: Any) -> None:
97
- """Reports result of the supplied task."""
98
- logger = logger.bind(module=__name__)
99
-
100
- if self._is_shutdown:
101
- logger.warning(
102
- "task reporter got shutdown, skipping task outcome reporting"
103
- )
104
- return
105
-
106
- # TODO: If the files are uploaded successfully,
107
- # we should record that so that if we fail to report
108
- # the task outcome, we don't retry the upload.
109
- # This will save us some time and resources.
110
- # It's good to do this once we delete all the legacy code paths.
111
-
112
- output_summary: TaskOutputSummary = _task_output_summary(output)
113
- logger.info(
114
- "reporting task outcome",
115
- total_bytes=output_summary.total_bytes,
116
- total_files=output_summary.output_count
117
- + output_summary.stdout_count
118
- + output_summary.stderr_count,
119
- output_files=output_summary.output_count,
120
- output_bytes=output_summary.total_bytes,
121
- router_output_count=output_summary.router_output_count,
122
- stdout_bytes=output_summary.stdout_total_bytes,
123
- stderr_bytes=output_summary.stderr_total_bytes,
124
- )
125
-
126
- if output.output_payload_uri_prefix is None:
127
- ingested_files = await self._ingest_files_at_server(output, logger)
128
- else:
129
- ingested_files = await self._ingest_files_at_blob_store(output, logger)
130
-
131
- fn_outputs = []
132
- for data_payload in ingested_files.data_payloads:
133
- fn_outputs.append(
134
- DataPayloadProto(
135
- path=data_payload.path, # TODO: stop using this deprecated field once Server side migration is done.
136
- uri=data_payload.path,
137
- size=data_payload.size,
138
- sha256_hash=data_payload.sha256_hash,
139
- encoding=_to_grpc_data_payload_encoding(output),
140
- encoding_version=0,
141
- )
142
- )
143
- stdout, stderr = None, None
144
- if ingested_files.stdout is not None:
145
- stdout = DataPayloadProto(
146
- path=ingested_files.stdout.path, # TODO: stop using this deprecated field once Server side migration is done.
147
- uri=ingested_files.stdout.path,
148
- size=ingested_files.stdout.size,
149
- sha256_hash=ingested_files.stdout.sha256_hash,
150
- encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
151
- encoding_version=0,
152
- )
153
- if ingested_files.stderr is not None:
154
- stderr = DataPayloadProto(
155
- path=ingested_files.stderr.path, # TODO: stop using this deprecated field once Server side migration is done.
156
- uri=ingested_files.stderr.path,
157
- size=ingested_files.stderr.size,
158
- sha256_hash=ingested_files.stderr.sha256_hash,
159
- encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
160
- encoding_version=0,
161
- )
162
-
163
- request = ReportTaskOutcomeRequest(
164
- task_id=output.task_id,
165
- namespace=output.namespace,
166
- graph_name=output.graph_name,
167
- function_name=output.function_name,
168
- graph_invocation_id=output.graph_invocation_id,
169
- outcome=_to_grpc_task_outcome(output),
170
- invocation_id=output.graph_invocation_id,
171
- executor_id=self._executor_id,
172
- reducer=output.reducer,
173
- next_functions=(output.router_output.edges if output.router_output else []),
174
- fn_outputs=fn_outputs,
175
- stdout=stdout,
176
- stderr=stderr,
177
- output_encoding=_to_grpc_output_encoding(output),
178
- output_encoding_version=0,
179
- )
180
- try:
181
- stub = ExecutorAPIStub(await self._channel_manager.get_channel())
182
- with (
183
- metric_report_task_outcome_latency.time(),
184
- metric_report_task_outcome_errors.count_exceptions(),
185
- ):
186
- metric_report_task_outcome_rpcs.inc()
187
- await stub.report_task_outcome(request, timeout=5.0)
188
- except Exception as e:
189
- logger.error("failed to report task outcome", error=e)
190
- raise e
191
-
192
- async def _ingest_files_at_server(
193
- self, output: TaskOutput, logger: Any
194
- ) -> IngestFnOutputsResponse:
195
- logger.warning("uploading task output files to server (deprecated mode)")
196
-
197
- task_result, output_files = self._process_task_output(output)
198
- task_result_data = task_result.model_dump_json(exclude_none=True)
199
-
200
- kwargs = {
201
- "data": {"task_result": task_result_data},
202
- # Use httpx default timeout of 5s for all timeout types.
203
- # For read timeouts, use 5 minutes to allow for large file uploads.
204
- "timeout": Timeout(
205
- 5.0,
206
- read=5.0 * 60,
207
- ),
208
- "files": output_files if len(output_files) > 0 else FORCE_MULTIPART,
209
- }
210
-
211
- start_time = time.time()
212
- with metric_server_ingest_files_latency.time():
213
- metric_server_ingest_files_requests.inc()
214
- # Run in a separate thread to not block the main event loop.
215
- response = await asyncio.to_thread(
216
- self._client.post,
217
- url=f"{self._base_url}/internal/ingest_fn_outputs",
218
- **kwargs,
219
- )
220
- end_time = time.time()
221
- logger.info(
222
- "files uploaded to server",
223
- response_time=end_time - start_time,
224
- response_code=response.status_code,
225
- )
226
-
227
- try:
228
- response.raise_for_status()
229
- except Exception as e:
230
- metric_server_ingest_files_errors.inc()
231
- # Caller catches and logs the exception.
232
- raise Exception(
233
- "failed to upload files. "
234
- f"Response code: {response.status_code}. "
235
- f"Response text: '{response.text}'."
236
- ) from e
237
-
238
- ingested_files_response = response.json()
239
- return IngestFnOutputsResponse.model_validate(ingested_files_response)
240
-
241
- async def _ingest_files_at_blob_store(
242
- self, output: TaskOutput, logger: Any
243
- ) -> IngestFnOutputsResponse:
244
- start_time = time.time()
245
- with (
246
- metric_task_output_blob_store_upload_latency.time(),
247
- metric_task_output_blob_store_upload_errors.count_exceptions(),
248
- ):
249
- metric_task_output_blob_store_uploads.inc()
250
- response = await self._upload_output_to_blob_store(output, logger)
251
-
252
- logger.info(
253
- "files uploaded to blob store",
254
- duration=time.time() - start_time,
255
- )
256
- return response
257
-
258
- async def _upload_output_to_blob_store(
259
- self, output: TaskOutput, logger: Any
260
- ) -> IngestFnOutputsResponse:
261
- data_payloads: List[DataPayload] = []
262
- stdout: Optional[DataPayload] = None
263
- stderr: Optional[DataPayload] = None
264
-
265
- if output.stdout is not None:
266
- stdout_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stdout"
267
- stdout_bytes: bytes = output.stdout.encode()
268
- await self._blob_store.put(stdout_url, stdout_bytes, logger)
269
- stdout = DataPayload(
270
- path=stdout_url,
271
- size=len(stdout_bytes),
272
- sha256_hash=_compute_hash(stdout_bytes),
273
- )
274
-
275
- if output.stderr is not None:
276
- stderr_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stderr"
277
- stderr_bytes: bytes = output.stderr.encode()
278
- await self._blob_store.put(stderr_url, stderr_bytes, logger)
279
- stderr = DataPayload(
280
- path=stderr_url,
281
- size=len(stderr_bytes),
282
- sha256_hash=_compute_hash(stderr_bytes),
283
- )
284
-
285
- if output.function_output is not None:
286
- for func_output_item in output.function_output.outputs:
287
- node_output_sequence = len(data_payloads)
288
- if output.reducer:
289
- # Reducer tasks have to write their results into the same blob.
290
- output_url = (
291
- f"{output.output_payload_uri_prefix}.{node_output_sequence}"
292
- )
293
- else:
294
- # Regular tasks write their results into different blobs made unique using task ids.
295
- output_url = f"{output.output_payload_uri_prefix}.{output.task_id}.{node_output_sequence}"
296
-
297
- output_bytes: bytes = (
298
- func_output_item.bytes
299
- if func_output_item.HasField("bytes")
300
- else func_output_item.string.encode()
301
- )
302
- await self._blob_store.put(output_url, output_bytes, logger)
303
- data_payloads.append(
304
- DataPayload(
305
- path=output_url,
306
- size=len(output_bytes),
307
- sha256_hash=_compute_hash(output_bytes),
308
- )
309
- )
310
-
311
- return IngestFnOutputsResponse(
312
- data_payloads=data_payloads,
313
- stdout=stdout,
314
- stderr=stderr,
315
- )
316
-
317
- def _process_task_output(self, output: TaskOutput) -> Tuple[TaskResult, List[Any]]:
318
- task_result = TaskResult(
319
- outcome="failure",
320
- namespace=output.namespace,
321
- compute_graph=output.graph_name,
322
- compute_fn=output.function_name,
323
- invocation_id=output.graph_invocation_id,
324
- executor_id=self._executor_id,
325
- task_id=output.task_id,
326
- reducer=output.reducer,
327
- )
328
- output_files: List[Any] = []
329
- task_result.outcome = (
330
- TASK_OUTCOME_SUCCESS if output.success else TASK_OUTCOME_FAILURE
331
- )
332
-
333
- _process_function_output(
334
- function_output=output.function_output, output_files=output_files
335
- )
336
- _process_router_output(
337
- router_output=output.router_output, task_result=task_result
338
- )
339
- _process_stdout(stdout=output.stdout, output_files=output_files)
340
- _process_stderr(stderr=output.stderr, output_files=output_files)
341
-
342
- return task_result, output_files
343
-
344
-
345
- def _process_function_output(
346
- function_output: Optional[FunctionOutput], output_files: List[Any]
347
- ) -> None:
348
- if function_output is None:
349
- return
350
-
351
- for output in function_output.outputs or []:
352
- payload = output.bytes if output.HasField("bytes") else output.string
353
- output_files.append(
354
- (
355
- "node_outputs",
356
- (nanoid.generate(), payload, output.content_type),
357
- )
358
- )
359
-
360
-
361
- def _process_router_output(
362
- router_output: Optional[RouterOutput],
363
- task_result: TaskResult,
364
- ) -> None:
365
- if router_output is None:
366
- return
367
-
368
- task_result.router_output = RouterOutput(edges=router_output.edges)
369
-
370
-
371
- def _process_stdout(stdout: Optional[str], output_files: List[Any]) -> None:
372
- if stdout is None:
373
- return
374
-
375
- output_files.append(
376
- (
377
- "stdout",
378
- (
379
- nanoid.generate(),
380
- stdout.encode(),
381
- UTF_8_CONTENT_TYPE,
382
- ),
383
- )
384
- )
385
-
386
-
387
- def _process_stderr(stderr: Optional[str], output_files: List[Any]) -> None:
388
- if stderr is None:
389
- return
390
-
391
- output_files.append(
392
- (
393
- "stderr",
394
- (
395
- nanoid.generate(),
396
- stderr.encode(),
397
- UTF_8_CONTENT_TYPE,
398
- ),
399
- )
400
- )
401
-
402
-
403
- def _task_output_summary(output: TaskOutput) -> TaskOutputSummary:
404
- summary: TaskOutputSummary = TaskOutputSummary()
405
-
406
- if output.stdout is not None:
407
- summary.stdout_count += 1
408
- summary.stdout_total_bytes += len(output.stdout)
409
-
410
- if output.stderr is not None:
411
- summary.stderr_count += 1
412
- summary.stderr_total_bytes += len(output.stderr)
413
-
414
- if output.function_output is not None:
415
- for func_output_item in output.function_output.outputs:
416
- output_len: bytes = len(
417
- func_output_item.bytes
418
- if func_output_item.HasField("bytes")
419
- else func_output_item.string
420
- )
421
- summary.output_count += 1
422
- summary.output_total_bytes += output_len
423
-
424
- if output.router_output is not None:
425
- summary.router_output_count += 1
426
-
427
- summary.total_bytes = (
428
- summary.output_total_bytes
429
- + summary.stdout_total_bytes
430
- + summary.stderr_total_bytes
431
- )
432
- return summary
433
-
434
-
435
- def _to_grpc_task_outcome(task_output: TaskOutput) -> TaskOutcome:
436
- if task_output.success:
437
- return TaskOutcome.TASK_OUTCOME_SUCCESS
438
- else:
439
- return TaskOutcome.TASK_OUTCOME_FAILURE
440
-
441
-
442
- def _to_grpc_output_encoding(task_output: TaskOutput) -> OutputEncoding:
443
- if task_output.output_encoding == "json":
444
- return OutputEncoding.OUTPUT_ENCODING_JSON
445
- else:
446
- return OutputEncoding.OUTPUT_ENCODING_PICKLE
447
-
448
-
449
- def _to_grpc_data_payload_encoding(task_output: TaskOutput) -> DataPayloadEncoding:
450
- if task_output.output_encoding == "json":
451
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
452
- else:
453
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
454
-
455
-
456
- def _compute_hash(data: bytes) -> str:
457
- hasher = hashlib.sha256(usedforsecurity=False)
458
- hasher.update(data)
459
- return hasher.hexdigest()