indexify 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. indexify/cli/executor.py +2 -9
  2. indexify/executor/blob_store/blob_store.py +110 -26
  3. indexify/executor/blob_store/local_fs_blob_store.py +41 -1
  4. indexify/executor/blob_store/metrics/blob_store.py +87 -15
  5. indexify/executor/blob_store/s3_blob_store.py +112 -1
  6. indexify/executor/function_executor/function_executor.py +32 -56
  7. indexify/executor/function_executor/invocation_state_client.py +10 -3
  8. indexify/executor/function_executor/server/function_executor_server_factory.py +0 -1
  9. indexify/executor/function_executor_controller/create_function_executor.py +129 -116
  10. indexify/executor/function_executor_controller/downloads.py +34 -86
  11. indexify/executor/function_executor_controller/events.py +13 -7
  12. indexify/executor/function_executor_controller/finalize_task.py +184 -0
  13. indexify/executor/function_executor_controller/function_executor_controller.py +121 -78
  14. indexify/executor/function_executor_controller/message_validators.py +10 -3
  15. indexify/executor/function_executor_controller/metrics/downloads.py +8 -52
  16. indexify/executor/function_executor_controller/metrics/finalize_task.py +20 -0
  17. indexify/executor/function_executor_controller/metrics/prepare_task.py +18 -0
  18. indexify/executor/function_executor_controller/metrics/run_task.py +5 -4
  19. indexify/executor/function_executor_controller/prepare_task.py +232 -14
  20. indexify/executor/function_executor_controller/run_task.py +189 -81
  21. indexify/executor/function_executor_controller/task_info.py +4 -7
  22. indexify/executor/function_executor_controller/task_input.py +21 -0
  23. indexify/executor/function_executor_controller/task_output.py +41 -33
  24. indexify/executor/function_executor_controller/terminate_function_executor.py +6 -1
  25. indexify/executor/logging.py +69 -0
  26. indexify/executor/monitoring/metrics.py +22 -0
  27. indexify/proto/executor_api.proto +11 -3
  28. indexify/proto/executor_api_pb2.py +54 -54
  29. indexify/proto/executor_api_pb2.pyi +8 -1
  30. {indexify-0.4.21.dist-info → indexify-0.4.23.dist-info}/METADATA +6 -7
  31. {indexify-0.4.21.dist-info → indexify-0.4.23.dist-info}/RECORD +33 -31
  32. indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -21
  33. indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -39
  34. indexify/executor/function_executor_controller/upload_task_output.py +0 -274
  35. {indexify-0.4.21.dist-info → indexify-0.4.23.dist-info}/WHEEL +0 -0
  36. {indexify-0.4.21.dist-info → indexify-0.4.23.dist-info}/entry_points.txt +0 -0
@@ -1,274 +0,0 @@
1
- import asyncio
2
- import hashlib
3
- import time
4
- from typing import Any, List
5
-
6
- from tensorlake.function_executor.proto.function_executor_pb2 import (
7
- SerializedObject,
8
- SerializedObjectEncoding,
9
- )
10
-
11
- from indexify.executor.blob_store.blob_store import BLOBStore
12
- from indexify.proto.executor_api_pb2 import (
13
- DataPayload,
14
- DataPayloadEncoding,
15
- )
16
-
17
- from .events import TaskOutputUploadFinished
18
- from .metrics.upload_task_output import (
19
- metric_task_output_blob_store_upload_errors,
20
- metric_task_output_blob_store_upload_latency,
21
- metric_task_output_blob_store_uploads,
22
- metric_task_output_upload_latency,
23
- metric_task_output_upload_retries,
24
- metric_task_output_uploads,
25
- metric_tasks_uploading_outputs,
26
- )
27
- from .task_info import TaskInfo
28
- from .task_output import TaskOutput
29
-
30
- _TASK_OUTPUT_UPLOAD_BACKOFF_SEC = 5.0
31
-
32
-
33
- async def upload_task_output(
34
- task_info: TaskInfo, blob_store: BLOBStore, logger: Any
35
- ) -> TaskOutputUploadFinished:
36
- """Uploads the task output to blob store.
37
-
38
- Doesn't raise any Exceptions. Runs till the reporting is successful.
39
- """
40
- logger = logger.bind(module=__name__)
41
-
42
- with (
43
- metric_tasks_uploading_outputs.track_inprogress(),
44
- metric_task_output_upload_latency.time(),
45
- ):
46
- metric_task_output_uploads.inc()
47
- await _upload_task_output_until_successful(
48
- output=task_info.output,
49
- blob_store=blob_store,
50
- logger=logger,
51
- )
52
- _log_function_metrics(output=task_info.output, logger=logger)
53
- return TaskOutputUploadFinished(task_info=task_info, is_success=True)
54
-
55
-
56
- async def _upload_task_output_until_successful(
57
- output: TaskOutput, blob_store: BLOBStore, logger: Any
58
- ) -> None:
59
- upload_retries: int = 0
60
-
61
- while True:
62
- logger = logger.bind(retries=upload_retries)
63
- try:
64
- await _upload_task_output_once(
65
- output=output, blob_store=blob_store, logger=logger
66
- )
67
- return
68
- except Exception as e:
69
- logger.error(
70
- "failed to upload task output",
71
- exc_info=e,
72
- )
73
- upload_retries += 1
74
- metric_task_output_upload_retries.inc()
75
- await asyncio.sleep(_TASK_OUTPUT_UPLOAD_BACKOFF_SEC)
76
-
77
-
78
- class _TaskOutputSummary:
79
- def __init__(self):
80
- self.output_count: int = 0
81
- self.output_total_bytes: int = 0
82
- self.next_functions_count: int = 0
83
- self.stdout_count: int = 0
84
- self.stdout_total_bytes: int = 0
85
- self.stderr_count: int = 0
86
- self.stderr_total_bytes: int = 0
87
- self.invocation_error_output_count: int = 0
88
- self.invocation_error_output_total_bytes: int = 0
89
- self.total_bytes: int = 0
90
-
91
-
92
- async def _upload_task_output_once(
93
- output: TaskOutput, blob_store: BLOBStore, logger: Any
94
- ) -> None:
95
- """Uploads the supplied task output to blob store.
96
-
97
- Raises an Exception if the upload fails.
98
- """
99
- output_summary: _TaskOutputSummary = _task_output_summary(output)
100
- logger.info(
101
- "uploading task output to blob store",
102
- total_bytes=output_summary.total_bytes,
103
- total_files=output_summary.output_count
104
- + output_summary.stdout_count
105
- + output_summary.stderr_count
106
- + output_summary.invocation_error_output_count,
107
- output_files=output_summary.output_count,
108
- output_bytes=output_summary.total_bytes,
109
- next_functions_count=output_summary.next_functions_count,
110
- stdout_bytes=output_summary.stdout_total_bytes,
111
- stderr_bytes=output_summary.stderr_total_bytes,
112
- invocation_error_output_bytes=output_summary.invocation_error_output_total_bytes,
113
- )
114
-
115
- start_time = time.time()
116
- with (
117
- metric_task_output_blob_store_upload_latency.time(),
118
- metric_task_output_blob_store_upload_errors.count_exceptions(),
119
- ):
120
- metric_task_output_blob_store_uploads.inc()
121
- await _upload_to_blob_store(
122
- task_output=output, blob_store=blob_store, logger=logger
123
- )
124
-
125
- logger.info(
126
- "files uploaded to blob store",
127
- duration=time.time() - start_time,
128
- )
129
-
130
-
131
- async def _upload_to_blob_store(
132
- task_output: TaskOutput, blob_store: BLOBStore, logger: Any
133
- ) -> None:
134
- if task_output.stdout is not None:
135
- stdout_url = f"{task_output.allocation.task.output_payload_uri_prefix}.{task_output.allocation.task.id}.stdout"
136
- stdout_bytes: bytes = task_output.stdout.encode()
137
- await blob_store.put(stdout_url, stdout_bytes, logger)
138
- task_output.uploaded_stdout = DataPayload(
139
- uri=stdout_url,
140
- size=len(stdout_bytes),
141
- sha256_hash=compute_hash(stdout_bytes),
142
- encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
143
- encoding_version=0,
144
- )
145
- # stdout is uploaded, free the memory used for it and don't upload again if we retry overall output upload again.
146
- task_output.stdout = None
147
-
148
- if task_output.stderr is not None:
149
- stderr_url = f"{task_output.allocation.task.output_payload_uri_prefix}.{task_output.allocation.task.id}.stderr"
150
- stderr_bytes: bytes = task_output.stderr.encode()
151
- await blob_store.put(stderr_url, stderr_bytes, logger)
152
- task_output.uploaded_stderr = DataPayload(
153
- uri=stderr_url,
154
- size=len(stderr_bytes),
155
- sha256_hash=compute_hash(stderr_bytes),
156
- encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
157
- encoding_version=0,
158
- )
159
- # stderr is uploaded, free the memory used for it and don't upload again if we retry overall output upload again.
160
- task_output.stderr = None
161
-
162
- if task_output.invocation_error_output is not None:
163
- invocation_error_output_url = (
164
- f"{task_output.allocation.task.output_payload_uri_prefix}.inverr."
165
- f"{task_output.allocation.task.graph_invocation_id}"
166
- )
167
- invocation_error_output_bytes: bytes = task_output.invocation_error_output.data
168
- await blob_store.put(
169
- invocation_error_output_url, invocation_error_output_bytes, logger
170
- )
171
- task_output.uploaded_invocation_error_output = DataPayload(
172
- uri=invocation_error_output_url,
173
- size=len(invocation_error_output_bytes),
174
- sha256_hash=compute_hash(invocation_error_output_bytes),
175
- encoding=_to_grpc_data_payload_encoding(
176
- task_output.invocation_error_output.encoding, logger
177
- ),
178
- encoding_version=0,
179
- )
180
- # Invocation error output is uploaded, free the memory used for it and don't upload again if we retry overall output upload again.
181
- task_output.invocation_error_output = None
182
-
183
- # We can't use the default empty list output.uploaded_data_payloads because it's a singleton.
184
- uploaded_data_payloads: List[DataPayload] = []
185
- for output in task_output.function_outputs:
186
- output: SerializedObject
187
- output_ix: int = len(uploaded_data_payloads)
188
- output_url: str = (
189
- f"{task_output.allocation.task.output_payload_uri_prefix}.{task_output.allocation.task.id}.{output_ix}"
190
- )
191
- await blob_store.put(output_url, output.data, logger)
192
- uploaded_data_payloads.append(
193
- DataPayload(
194
- uri=output_url,
195
- size=len(output.data),
196
- sha256_hash=compute_hash(output.data),
197
- encoding=_to_grpc_data_payload_encoding(output.encoding, logger),
198
- encoding_version=0,
199
- )
200
- )
201
-
202
- task_output.uploaded_data_payloads = uploaded_data_payloads
203
- # The output is uploaded, free the memory used for it and don't upload again if we retry overall output upload again.
204
- task_output.function_outputs = []
205
-
206
-
207
- def _task_output_summary(task_output: TaskOutput) -> _TaskOutputSummary:
208
- summary: _TaskOutputSummary = _TaskOutputSummary()
209
-
210
- if task_output.stdout is not None:
211
- summary.stdout_count += 1
212
- summary.stdout_total_bytes += len(task_output.stdout)
213
-
214
- if task_output.stderr is not None:
215
- summary.stderr_count += 1
216
- summary.stderr_total_bytes += len(task_output.stderr)
217
-
218
- if task_output.invocation_error_output is not None:
219
- summary.invocation_error_output_count += 1
220
- summary.invocation_error_output_total_bytes += len(
221
- task_output.invocation_error_output.data
222
- )
223
-
224
- for output in task_output.function_outputs:
225
- output: SerializedObject
226
- output_len: bytes = len(output.data)
227
- summary.output_count += 1
228
- summary.output_total_bytes += output_len
229
-
230
- summary.next_functions_count = len(task_output.next_functions)
231
-
232
- summary.total_bytes = (
233
- summary.output_total_bytes
234
- + summary.stdout_total_bytes
235
- + summary.stderr_total_bytes
236
- )
237
- return summary
238
-
239
-
240
- def _to_grpc_data_payload_encoding(
241
- encoding: SerializedObjectEncoding, logger: Any
242
- ) -> DataPayloadEncoding:
243
- if encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE:
244
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
245
- elif encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_JSON:
246
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
247
- elif encoding == SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_TEXT:
248
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT
249
- else:
250
- logger.error(
251
- "Unexpected encoding for SerializedObject",
252
- encoding=SerializedObjectEncoding.Name(encoding),
253
- )
254
- return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UNKNOWN
255
-
256
-
257
- def compute_hash(data: bytes) -> str:
258
- hasher = hashlib.sha256(usedforsecurity=False)
259
- hasher.update(data)
260
- return hasher.hexdigest()
261
-
262
-
263
- # Temporary workaround is logging customer metrics until we store them somewhere
264
- # for future retrieval and processing.
265
- def _log_function_metrics(output: TaskOutput, logger: Any):
266
- if output.metrics is None:
267
- return
268
-
269
- for counter_name, counter_value in output.metrics.counters.items():
270
- logger.info(
271
- "function_metric", counter_name=counter_name, counter_value=counter_value
272
- )
273
- for timer_name, timer_value in output.metrics.timers.items():
274
- logger.info("function_metric", timer_name=timer_name, timer_value=timer_value)