indexify 0.3.20__tar.gz → 0.3.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {indexify-0.3.20 → indexify-0.3.22}/PKG-INFO +1 -1
  2. {indexify-0.3.20 → indexify-0.3.22}/pyproject.toml +1 -1
  3. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/cli/cli.py +1 -10
  4. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/api_objects.py +11 -6
  5. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/blob_store/local_fs_blob_store.py +2 -2
  6. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/blob_store/s3_blob_store.py +3 -0
  7. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/downloader.py +60 -18
  8. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/executor.py +6 -4
  9. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +3 -0
  10. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/single_task_runner.py +5 -0
  11. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/task_output.py +6 -0
  12. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/function_executor_controller.py +3 -0
  13. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/state_reconciler.py +1 -3
  14. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/state_reporter.py +1 -3
  15. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/task_controller.py +15 -0
  16. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/metrics/task_reporter.py +17 -0
  17. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/task_reporter.py +197 -92
  18. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/task_runner.py +1 -0
  19. {indexify-0.3.20 → indexify-0.3.22}/README.md +0 -0
  20. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/README.md +0 -0
  21. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/blob_store/blob_store.py +0 -0
  22. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  23. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/executor_flavor.py +0 -0
  24. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/function_executor.py +0 -0
  25. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/function_executor_state.py +0 -0
  26. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/function_executor_states_container.py +0 -0
  27. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/function_executor_status.py +0 -0
  28. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/health_checker.py +0 -0
  29. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  30. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  31. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -0
  32. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -0
  33. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  34. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  35. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/metrics/single_task_runner.py +0 -0
  36. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  37. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  38. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  39. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  40. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/function_executor/task_input.py +0 -0
  41. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/channel_manager.py +0 -0
  42. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/metrics/channel_manager.py +0 -0
  43. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/metrics/state_reconciler.py +0 -0
  44. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/metrics/state_reporter.py +0 -0
  45. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/grpc/metrics/task_controller.py +0 -0
  46. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/metrics/downloader.py +0 -0
  47. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/metrics/executor.py +0 -0
  48. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/metrics/task_fetcher.py +0 -0
  49. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/metrics/task_runner.py +0 -0
  50. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/function_allowlist.py +0 -0
  51. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/handler.py +0 -0
  52. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  53. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  54. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  55. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/metrics.py +0 -0
  56. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  57. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/server.py +0 -0
  58. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  59. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/runtime_probes.py +0 -0
  60. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/executor/task_fetcher.py +0 -0
  61. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/proto/executor_api.proto +0 -0
  62. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/proto/executor_api_pb2.py +0 -0
  63. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/proto/executor_api_pb2.pyi +0 -0
  64. {indexify-0.3.20 → indexify-0.3.22}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.3.20
3
+ Version: 0.3.22
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.3.20"
4
+ version = "0.3.22"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -102,9 +102,6 @@ def executor(
102
102
  executor_cache: Optional[str] = typer.Option(
103
103
  "~/.indexify/executor_cache", help="Path to the executor cache directory"
104
104
  ),
105
- executor_id: Optional[str] = typer.Option(
106
- None, help="ID of the executor, if not provided, a random ID will be generated"
107
- ),
108
105
  # Registred ports range ends at 49151.
109
106
  ports: Tuple[int, int] = typer.Option(
110
107
  (50000, 51000),
@@ -153,18 +150,12 @@ def executor(
153
150
  "At least one function must be specified when not running in development mode"
154
151
  )
155
152
 
156
- if executor_id is None:
157
- executor_id = nanoid.generate()
158
- elif not re.compile(r"^[a-zA-Z0-9_-]{10,}$").match(executor_id):
159
- raise typer.BadParameter(
160
- "--executor-id should be at least 10 characters long and only include characters _-[0-9][a-z][A-Z]"
161
- )
162
-
163
153
  kv_labels: Dict[str, str] = {}
164
154
  for label in labels:
165
155
  key, value = label.split("=")
166
156
  kv_labels[key] = value
167
157
 
158
+ executor_id: str = nanoid.generate()
168
159
  executor_version = version("indexify")
169
160
  logger = structlog.get_logger(module=__name__, executor_id=executor_id)
170
161
 
@@ -3,6 +3,13 @@ from typing import Any, Dict, List, Optional
3
3
  from pydantic import BaseModel
4
4
 
5
5
 
6
+ class DataPayload(BaseModel):
7
+ path: str
8
+ size: int
9
+ sha256_hash: str
10
+ content_type: Optional[str] = None
11
+
12
+
6
13
  class Task(BaseModel):
7
14
  id: str
8
15
  namespace: str
@@ -16,6 +23,10 @@ class Task(BaseModel):
16
23
  "image_uri defines the URI of the image of this task. Optional since some executors do not require it."
17
24
  secret_names: Optional[List[str]] = None
18
25
  "secret_names defines the names of the secrets to set on function executor. Optional for backward compatibility."
26
+ graph_payload: Optional[DataPayload] = None
27
+ input_payload: Optional[DataPayload] = None
28
+ reducer_input_payload: Optional[DataPayload] = None
29
+ output_payload_uri_prefix: Optional[str] = None
19
30
 
20
31
 
21
32
  class FunctionURI(BaseModel):
@@ -49,12 +60,6 @@ class TaskResult(BaseModel):
49
60
  reducer: bool = False
50
61
 
51
62
 
52
- class DataPayload(BaseModel):
53
- path: str
54
- size: int
55
- sha256_hash: str
56
-
57
-
58
63
  class IngestFnOutputsResponse(BaseModel):
59
64
  data_payloads: List[DataPayload]
60
65
  stdout: Optional[DataPayload] = None
@@ -27,7 +27,7 @@ class LocalFSBLOBStore:
27
27
 
28
28
  def _sync_get(self, path: str) -> bytes:
29
29
  if not os.path.isabs(path):
30
- raise ValueError(f"Path {path} is not absolute")
30
+ raise ValueError(f"Path {path} must be absolute")
31
31
 
32
32
  if os.path.exists(path):
33
33
  with open(path, mode="rb") as blob_file:
@@ -37,7 +37,7 @@ class LocalFSBLOBStore:
37
37
 
38
38
  def _sync_put(self, path: str, value: bytes) -> None:
39
39
  if not os.path.isabs(path):
40
- raise ValueError(f"Path {path} is not absolute")
40
+ raise ValueError(f"Path {path} must be absolute")
41
41
 
42
42
  os.makedirs(os.path.dirname(path), exist_ok=True)
43
43
  with open(path, mode="wb") as blob_file:
@@ -77,8 +77,11 @@ class S3BLOBStore:
77
77
 
78
78
 
79
79
  def _bucket_name_and_object_key_from_uri(uri: str) -> tuple[str, str]:
80
+ # Example S3 object URI:
81
+ # s3://test-indexify-server-blob-store-eugene-20250411/225b83f4-2aed-40a7-adee-b7a681f817f2
80
82
  if not uri.startswith("s3://"):
81
83
  raise ValueError(f"S3 URI '{uri}' is missing 's3://' prefix")
84
+
82
85
  parts = uri[5:].split("/", 1)
83
86
  if len(parts) != 2:
84
87
  raise ValueError(f"Failed parsing bucket name from S3 URI '{uri}'")
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import os
3
- from typing import Any, Optional
3
+ from typing import Any, Optional, Union
4
4
 
5
5
  import httpx
6
6
  import nanoid
@@ -8,11 +8,10 @@ from tensorlake.function_executor.proto.function_executor_pb2 import SerializedO
8
8
  from tensorlake.function_executor.proto.message_validator import MessageValidator
9
9
  from tensorlake.utils.http_client import get_httpx_client
10
10
 
11
- from indexify.proto.executor_api_pb2 import (
12
- DataPayload,
13
- DataPayloadEncoding,
14
- )
11
+ from indexify.proto.executor_api_pb2 import DataPayload as DataPayloadProto
12
+ from indexify.proto.executor_api_pb2 import DataPayloadEncoding
15
13
 
14
+ from .api_objects import DataPayload
16
15
  from .blob_store.blob_store import BLOBStore
17
16
  from .metrics.downloader import (
18
17
  metric_graph_download_errors,
@@ -49,7 +48,7 @@ class Downloader:
49
48
  namespace: str,
50
49
  graph_name: str,
51
50
  graph_version: str,
52
- data_payload: Optional[DataPayload],
51
+ data_payload: Optional[Union[DataPayload, DataPayloadProto]],
53
52
  logger: Any,
54
53
  ) -> SerializedObject:
55
54
  logger = logger.bind(module=__name__)
@@ -99,7 +98,7 @@ class Downloader:
99
98
  function_name: str,
100
99
  graph_invocation_id: str,
101
100
  reducer_output_key: str,
102
- data_payload: Optional[DataPayload],
101
+ data_payload: Optional[Union[DataPayload, DataPayloadProto]],
103
102
  logger: Any,
104
103
  ) -> SerializedObject:
105
104
  logger = logger.bind(module=__name__)
@@ -124,7 +123,7 @@ class Downloader:
124
123
  namespace: str,
125
124
  graph_name: str,
126
125
  graph_version: str,
127
- data_payload: Optional[DataPayload],
126
+ data_payload: Optional[Union[DataPayload, DataPayloadProto]],
128
127
  logger: Any,
129
128
  ) -> SerializedObject:
130
129
  # Cache graph to reduce load on the server.
@@ -151,7 +150,7 @@ class Downloader:
151
150
  graph_version=graph_version,
152
151
  logger=logger,
153
152
  )
154
- else:
153
+ elif isinstance(data_payload, DataPayloadProto):
155
154
  (
156
155
  MessageValidator(data_payload)
157
156
  .required_field("uri")
@@ -160,7 +159,15 @@ class Downloader:
160
159
  data: bytes = await self._blob_store.get(
161
160
  uri=data_payload.uri, logger=logger
162
161
  )
163
- return _data_payload_to_serialized_object(
162
+ return _serialized_object_from_data_payload_proto(
163
+ data_payload=data_payload,
164
+ data=data,
165
+ )
166
+ elif isinstance(data_payload, DataPayload):
167
+ data: bytes = await self._blob_store.get(
168
+ uri=data_payload.path, logger=logger
169
+ )
170
+ return _serialized_object_from_data_payload(
164
171
  data_payload=data_payload,
165
172
  data=data,
166
173
  )
@@ -204,7 +211,7 @@ class Downloader:
204
211
  graph_name: str,
205
212
  graph_invocation_id: str,
206
213
  input_key: str,
207
- data_payload: Optional[DataPayload],
214
+ data_payload: Optional[Union[DataPayload, DataPayloadProto]],
208
215
  logger: Any,
209
216
  ) -> SerializedObject:
210
217
  if data_payload is None:
@@ -221,7 +228,7 @@ class Downloader:
221
228
  return await self._fetch_function_input_from_server(
222
229
  input_key=input_key, logger=logger
223
230
  )
224
- else:
231
+ elif isinstance(data_payload, DataPayloadProto):
225
232
  (
226
233
  MessageValidator(data_payload)
227
234
  .required_field("uri")
@@ -230,7 +237,15 @@ class Downloader:
230
237
  data: bytes = await self._blob_store.get(
231
238
  uri=data_payload.uri, logger=logger
232
239
  )
233
- return _data_payload_to_serialized_object(
240
+ return _serialized_object_from_data_payload_proto(
241
+ data_payload=data_payload,
242
+ data=data,
243
+ )
244
+ elif isinstance(data_payload, DataPayload):
245
+ data: bytes = await self._blob_store.get(
246
+ uri=data_payload.path, logger=logger
247
+ )
248
+ return _serialized_object_from_data_payload(
234
249
  data_payload=data_payload,
235
250
  data=data,
236
251
  )
@@ -242,7 +257,7 @@ class Downloader:
242
257
  function_name: str,
243
258
  graph_invocation_id: str,
244
259
  reducer_output_key: str,
245
- data_payload: Optional[DataPayload],
260
+ data_payload: Optional[Union[DataPayload, DataPayloadProto]],
246
261
  logger: Any,
247
262
  ) -> SerializedObject:
248
263
  if data_payload is None:
@@ -254,7 +269,7 @@ class Downloader:
254
269
  reducer_output_key=reducer_output_key,
255
270
  logger=logger,
256
271
  )
257
- else:
272
+ elif isinstance(data_payload, DataPayloadProto):
258
273
  (
259
274
  MessageValidator(data_payload)
260
275
  .required_field("uri")
@@ -263,7 +278,15 @@ class Downloader:
263
278
  data: bytes = await self._blob_store.get(
264
279
  uri=data_payload.uri, logger=logger
265
280
  )
266
- return _data_payload_to_serialized_object(
281
+ return _serialized_object_from_data_payload_proto(
282
+ data_payload=data_payload,
283
+ data=data,
284
+ )
285
+ elif isinstance(data_payload, DataPayload):
286
+ data: bytes = await self._blob_store.get(
287
+ uri=data_payload.path, logger=logger
288
+ )
289
+ return _serialized_object_from_data_payload(
267
290
  data_payload=data_payload,
268
291
  data=data,
269
292
  )
@@ -315,7 +338,11 @@ class Downloader:
315
338
  async def _fetch_url(
316
339
  self, url: str, resource_description: str, logger: Any
317
340
  ) -> SerializedObject:
318
- logger.info(f"fetching {resource_description}", url=url)
341
+ logger.warning(
342
+ f"downloading resource from Server",
343
+ url=url,
344
+ resource_description=resource_description,
345
+ )
319
346
  response: httpx.Response = await self._client.get(url)
320
347
  try:
321
348
  response.raise_for_status()
@@ -346,8 +373,23 @@ def serialized_object_from_http_response(response: httpx.Response) -> Serialized
346
373
  )
347
374
 
348
375
 
349
- def _data_payload_to_serialized_object(
376
+ def _serialized_object_from_data_payload(
350
377
  data_payload: DataPayload, data: bytes
378
+ ) -> SerializedObject:
379
+ """Converts the given data payload and its data into SerializedObject accepted by Function Executor."""
380
+ if data_payload.content_type in [
381
+ "application/octet-stream",
382
+ "application/pickle",
383
+ ]:
384
+ return SerializedObject(bytes=data, content_type=data_payload.content_type)
385
+ else:
386
+ return SerializedObject(
387
+ string=data.decode("utf-8"), content_type=data_payload.content_type
388
+ )
389
+
390
+
391
+ def _serialized_object_from_data_payload_proto(
392
+ data_payload: DataPayloadProto, data: bytes
351
393
  ) -> SerializedObject:
352
394
  """Converts the given data payload and its data into SerializedObject accepted by Function Executor.
353
395
 
@@ -128,6 +128,7 @@ class Executor:
128
128
  executor_id=id,
129
129
  config_path=config_path,
130
130
  channel_manager=self._channel_manager,
131
+ blob_store=blob_store,
131
132
  )
132
133
 
133
134
  # HTTP mode task runner
@@ -261,6 +262,7 @@ class Executor:
261
262
  function_name=task.compute_fn,
262
263
  graph_version=task.graph_version,
263
264
  graph_invocation_id=task.invocation_id,
265
+ output_payload_uri_prefix=task.output_payload_uri_prefix,
264
266
  )
265
267
  logger.error("task execution failed", exc_info=e)
266
268
 
@@ -304,19 +306,19 @@ class Executor:
304
306
  graph_name=task.compute_graph,
305
307
  graph_version=task.graph_version,
306
308
  logger=logger,
307
- data_payload=None,
309
+ data_payload=task.graph_payload,
308
310
  )
309
311
  input: SerializedObject = await self._downloader.download_input(
310
312
  namespace=task.namespace,
311
313
  graph_name=task.compute_graph,
312
314
  graph_invocation_id=task.invocation_id,
313
315
  input_key=task.input_key,
314
- data_payload=None,
316
+ data_payload=task.input_payload,
315
317
  logger=logger,
316
318
  )
317
319
  init_value: Optional[SerializedObject] = (
318
320
  None
319
- if task.reducer_output_id is None
321
+ if task.reducer_output_id is None and task.reducer_input_payload is None
320
322
  else (
321
323
  await self._downloader.download_init_value(
322
324
  namespace=task.namespace,
@@ -324,7 +326,7 @@ class Executor:
324
326
  function_name=task.compute_fn,
325
327
  graph_invocation_id=task.invocation_id,
326
328
  reducer_output_key=task.reducer_output_id,
327
- data_payload=None,
329
+ data_payload=task.reducer_input_payload,
328
330
  logger=logger,
329
331
  )
330
332
  )
@@ -19,6 +19,9 @@ class FunctionExecutorServerConfiguration:
19
19
  executor_id: str
20
20
  function_executor_id: str
21
21
  namespace: str
22
+ graph_name: str
23
+ function_name: str
24
+ graph_version: str
22
25
  image_uri: Optional[str]
23
26
  secret_names: List[str]
24
27
 
@@ -96,6 +96,7 @@ class SingleTaskRunner:
96
96
  graph_invocation_id=self._task_input.task.invocation_id,
97
97
  stderr=str(e),
98
98
  success=False,
99
+ output_payload_uri_prefix=self._task_input.task.output_payload_uri_prefix,
99
100
  )
100
101
 
101
102
  try:
@@ -137,6 +138,9 @@ class SingleTaskRunner:
137
138
  namespace=self._task_input.task.namespace,
138
139
  image_uri=self._task_input.task.image_uri,
139
140
  secret_names=self._task_input.task.secret_names or [],
141
+ graph_name=self._task_input.task.compute_graph,
142
+ graph_version=self._task_input.task.graph_version,
143
+ function_name=self._task_input.task.compute_fn,
140
144
  )
141
145
  )
142
146
  initialize_request: InitializeRequest = InitializeRequest(
@@ -311,6 +315,7 @@ def _task_output(task: Task, response: RunTaskResponse) -> TaskOutput:
311
315
  reducer=response.is_reducer,
312
316
  success=response.success,
313
317
  metrics=metrics,
318
+ output_payload_uri_prefix=task.output_payload_uri_prefix,
314
319
  )
315
320
 
316
321
  if response.HasField("function_output"):
@@ -25,6 +25,7 @@ class TaskOutput:
25
25
  function_name: str,
26
26
  graph_version: str,
27
27
  graph_invocation_id: str,
28
+ output_payload_uri_prefix: Optional[str],
28
29
  output_encoding: Optional[str] = None,
29
30
  function_output: Optional[FunctionOutput] = None,
30
31
  router_output: Optional[RouterOutput] = None,
@@ -50,6 +51,7 @@ class TaskOutput:
50
51
  self.is_internal_error = is_internal_error
51
52
  self.metrics = metrics
52
53
  self.output_encoding = output_encoding
54
+ self.output_payload_uri_prefix = output_payload_uri_prefix
53
55
 
54
56
  @classmethod
55
57
  def internal_error(
@@ -60,6 +62,7 @@ class TaskOutput:
60
62
  function_name: str,
61
63
  graph_version: str,
62
64
  graph_invocation_id: str,
65
+ output_payload_uri_prefix: Optional[str],
63
66
  ) -> "TaskOutput":
64
67
  """Creates a TaskOutput for an internal error."""
65
68
  # We are not sharing internal error messages with the customer.
@@ -72,6 +75,7 @@ class TaskOutput:
72
75
  graph_invocation_id=graph_invocation_id,
73
76
  stderr="Platform failed to execute the function.",
74
77
  is_internal_error=True,
78
+ output_payload_uri_prefix=output_payload_uri_prefix,
75
79
  )
76
80
 
77
81
  @classmethod
@@ -84,6 +88,7 @@ class TaskOutput:
84
88
  graph_version: str,
85
89
  graph_invocation_id: str,
86
90
  timeout_sec: float,
91
+ output_payload_uri_prefix: Optional[str],
87
92
  ) -> "TaskOutput":
88
93
  """Creates a TaskOutput for an function timeout error."""
89
94
  # Task stdout, stderr is not available.
@@ -96,4 +101,5 @@ class TaskOutput:
96
101
  graph_invocation_id=graph_invocation_id,
97
102
  stderr=f"Function exceeded its configured timeout of {timeout_sec:.3f} sec.",
98
103
  is_internal_error=False,
104
+ output_payload_uri_prefix=output_payload_uri_prefix,
99
105
  )
@@ -335,6 +335,9 @@ async def _create_function_executor(
335
335
  namespace=function_executor_description.namespace,
336
336
  image_uri=None,
337
337
  secret_names=list(function_executor_description.secret_names),
338
+ graph_name=function_executor_description.graph_name,
339
+ graph_version=function_executor_description.graph_version,
340
+ function_name=function_executor_description.function_name,
338
341
  )
339
342
  if function_executor_description.HasField("image_uri"):
340
343
  config.image_uri = function_executor_description.image_uri
@@ -9,9 +9,7 @@ from indexify.proto.executor_api_pb2 import (
9
9
  GetDesiredExecutorStatesRequest,
10
10
  TaskAllocation,
11
11
  )
12
- from indexify.proto.executor_api_pb2_grpc import (
13
- ExecutorAPIStub,
14
- )
12
+ from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
15
13
 
16
14
  from ..downloader import Downloader
17
15
  from ..function_executor.function_executor_state import FunctionExecutorState
@@ -24,9 +24,7 @@ from indexify.proto.executor_api_pb2 import (
24
24
  HostResources,
25
25
  ReportExecutorStateRequest,
26
26
  )
27
- from indexify.proto.executor_api_pb2_grpc import (
28
- ExecutorAPIStub,
29
- )
27
+ from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
30
28
 
31
29
  from ..api_objects import FunctionURI
32
30
  from ..executor_flavor import ExecutorFlavor
@@ -396,6 +396,11 @@ class TaskController:
396
396
  function_name=self._task.function_name,
397
397
  graph_version=self._task.graph_version,
398
398
  graph_invocation_id=self._task.graph_invocation_id,
399
+ output_payload_uri_prefix=(
400
+ self._task.output_payload_uri_prefix
401
+ if self._task.HasField("output_payload_uri_prefix")
402
+ else None
403
+ ),
399
404
  )
400
405
 
401
406
  def _function_timeout_output(self, timeout_sec: float) -> TaskOutput:
@@ -407,6 +412,11 @@ class TaskController:
407
412
  graph_version=self._task.graph_version,
408
413
  graph_invocation_id=self._task.graph_invocation_id,
409
414
  timeout_sec=timeout_sec,
415
+ output_payload_uri_prefix=(
416
+ self._task.output_payload_uri_prefix
417
+ if self._task.HasField("output_payload_uri_prefix")
418
+ else None
419
+ ),
410
420
  )
411
421
 
412
422
 
@@ -437,6 +447,11 @@ def _task_output_from_function_executor_response(
437
447
  reducer=response.is_reducer,
438
448
  success=response.success,
439
449
  metrics=metrics,
450
+ output_payload_uri_prefix=(
451
+ task.output_payload_uri_prefix
452
+ if task.HasField("output_payload_uri_prefix")
453
+ else None
454
+ ),
440
455
  )
441
456
 
442
457
  if response.HasField("function_output"):
@@ -21,6 +21,23 @@ metric_server_ingest_files_latency: prometheus_client.Histogram = (
21
21
  )
22
22
  )
23
23
 
24
+ metric_task_output_blob_store_uploads: prometheus_client.Counter = (
25
+ prometheus_client.Counter(
26
+ "task_output_blob_store_uploads", "Number of task output uploads to blob store"
27
+ )
28
+ )
29
+ metric_task_output_blob_store_upload_errors: prometheus_client.Counter = (
30
+ prometheus_client.Counter(
31
+ "task_output_blob_store_upload_errors",
32
+ "Number of failed task output uploads to blob store",
33
+ )
34
+ )
35
+ metric_task_output_blob_store_upload_latency: prometheus_client.Histogram = (
36
+ latency_metric_for_fast_operation(
37
+ "task_output_blob_store_upload", "Upload task output to blob store"
38
+ )
39
+ )
40
+
24
41
  metric_report_task_outcome_rpcs = prometheus_client.Counter(
25
42
  "report_task_outcome_rpcs",
26
43
  "Number of report task outcome RPCs to Server",
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import hashlib
2
3
  import time
3
4
  from typing import Any, List, Optional, Tuple
4
5
 
@@ -7,8 +8,8 @@ from httpx import Timeout
7
8
  from tensorlake.function_executor.proto.function_executor_pb2 import FunctionOutput
8
9
  from tensorlake.utils.http_client import get_httpx_client
9
10
 
11
+ from indexify.proto.executor_api_pb2 import DataPayload as DataPayloadProto
10
12
  from indexify.proto.executor_api_pb2 import (
11
- DataPayload,
12
13
  DataPayloadEncoding,
13
14
  OutputEncoding,
14
15
  ReportTaskOutcomeRequest,
@@ -19,10 +20,12 @@ from indexify.proto.executor_api_pb2_grpc import ExecutorAPIStub
19
20
  from .api_objects import (
20
21
  TASK_OUTCOME_FAILURE,
21
22
  TASK_OUTCOME_SUCCESS,
23
+ DataPayload,
22
24
  IngestFnOutputsResponse,
23
25
  RouterOutput,
24
26
  TaskResult,
25
27
  )
28
+ from .blob_store.blob_store import BLOBStore
26
29
  from .function_executor.task_output import TaskOutput
27
30
  from .grpc.channel_manager import ChannelManager
28
31
  from .metrics.task_reporter import (
@@ -32,6 +35,9 @@ from .metrics.task_reporter import (
32
35
  metric_server_ingest_files_errors,
33
36
  metric_server_ingest_files_latency,
34
37
  metric_server_ingest_files_requests,
38
+ metric_task_output_blob_store_upload_errors,
39
+ metric_task_output_blob_store_upload_latency,
40
+ metric_task_output_blob_store_uploads,
35
41
  )
36
42
 
37
43
 
@@ -63,6 +69,7 @@ class TaskReporter:
63
69
  base_url: str,
64
70
  executor_id: str,
65
71
  channel_manager: ChannelManager,
72
+ blob_store: BLOBStore,
66
73
  config_path: Optional[str] = None,
67
74
  ):
68
75
  self._base_url = base_url
@@ -75,6 +82,7 @@ class TaskReporter:
75
82
  # results in not reusing established TCP connections to server.
76
83
  self._client = get_httpx_client(config_path, make_async=False)
77
84
  self._channel_manager = channel_manager
85
+ self._blob_store = blob_store
78
86
 
79
87
  async def shutdown(self) -> None:
80
88
  """Shuts down the task reporter.
@@ -95,9 +103,13 @@ class TaskReporter:
95
103
  )
96
104
  return
97
105
 
98
- task_result, output_files, output_summary = self._process_task_output(output)
99
- task_result_data = task_result.model_dump_json(exclude_none=True)
106
+ # TODO: If the files are uploaded successfully,
107
+ # we should record that so that if we fail to report
108
+ # the task outcome, we don't retry the upload.
109
+ # This will save us some time and resources.
110
+ # It's good to do this once we delete all the legacy code paths.
100
111
 
112
+ output_summary: TaskOutputSummary = _task_output_summary(output)
101
113
  logger.info(
102
114
  "reporting task outcome",
103
115
  total_bytes=output_summary.total_bytes,
@@ -111,56 +123,15 @@ class TaskReporter:
111
123
  stderr_bytes=output_summary.stderr_total_bytes,
112
124
  )
113
125
 
114
- kwargs = {
115
- "data": {"task_result": task_result_data},
116
- # Use httpx default timeout of 5s for all timeout types.
117
- # For read timeouts, use 5 minutes to allow for large file uploads.
118
- "timeout": Timeout(
119
- 5.0,
120
- read=5.0 * 60,
121
- ),
122
- "files": output_files if len(output_files) > 0 else FORCE_MULTIPART,
123
- }
124
-
125
- # TODO: Instead of uploading the files to server, upload them to S3.
126
- start_time = time.time()
127
- with metric_server_ingest_files_latency.time():
128
- metric_server_ingest_files_requests.inc()
129
- # Run in a separate thread to not block the main event loop.
130
- response = await asyncio.to_thread(
131
- self._client.post,
132
- url=f"{self._base_url}/internal/ingest_fn_outputs",
133
- **kwargs,
134
- )
135
- end_time = time.time()
136
- logger.info(
137
- "files uploaded",
138
- response_time=end_time - start_time,
139
- response_code=response.status_code,
140
- )
141
-
142
- try:
143
- response.raise_for_status()
144
- except Exception as e:
145
- metric_server_ingest_files_errors.inc()
146
- # Caller catches and logs the exception.
147
- raise Exception(
148
- "failed to upload files. "
149
- f"Response code: {response.status_code}. "
150
- f"Response text: '{response.text}'."
151
- ) from e
152
-
153
- # TODO: If the files are uploaded successfully,
154
- # we should record that so that if we fail to report
155
- # the task outcome, we don't retry the upload.
156
- # This will save us some time and resources.
126
+ if output.output_payload_uri_prefix is None:
127
+ ingested_files = await self._ingest_files_at_server(output, logger)
128
+ else:
129
+ ingested_files = await self._ingest_files_at_blob_store(output, logger)
157
130
 
158
- ingested_files_response = response.json()
159
- ingested_files = IngestFnOutputsResponse.model_validate(ingested_files_response)
160
131
  fn_outputs = []
161
132
  for data_payload in ingested_files.data_payloads:
162
133
  fn_outputs.append(
163
- DataPayload(
134
+ DataPayloadProto(
164
135
  path=data_payload.path, # TODO: stop using this deprecated field once Server side migration is done.
165
136
  uri=data_payload.path,
166
137
  size=data_payload.size,
@@ -170,8 +141,8 @@ class TaskReporter:
170
141
  )
171
142
  )
172
143
  stdout, stderr = None, None
173
- if ingested_files.stdout:
174
- stdout = DataPayload(
144
+ if ingested_files.stdout is not None:
145
+ stdout = DataPayloadProto(
175
146
  path=ingested_files.stdout.path, # TODO: stop using this deprecated field once Server side migration is done.
176
147
  uri=ingested_files.stdout.path,
177
148
  size=ingested_files.stdout.size,
@@ -179,8 +150,8 @@ class TaskReporter:
179
150
  encoding=DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT,
180
151
  encoding_version=0,
181
152
  )
182
- if ingested_files.stderr:
183
- stderr = DataPayload(
153
+ if ingested_files.stderr is not None:
154
+ stderr = DataPayloadProto(
184
155
  path=ingested_files.stderr.path, # TODO: stop using this deprecated field once Server side migration is done.
185
156
  uri=ingested_files.stderr.path,
186
157
  size=ingested_files.stderr.size,
@@ -218,9 +189,132 @@ class TaskReporter:
218
189
  logger.error("failed to report task outcome", error=e)
219
190
  raise e
220
191
 
221
- def _process_task_output(
222
- self, output: TaskOutput
223
- ) -> Tuple[TaskResult, List[Any], TaskOutputSummary]:
192
+ async def _ingest_files_at_server(
193
+ self, output: TaskOutput, logger: Any
194
+ ) -> IngestFnOutputsResponse:
195
+ logger.warning("uploading task output files to server (deprecated mode)")
196
+
197
+ task_result, output_files = self._process_task_output(output)
198
+ task_result_data = task_result.model_dump_json(exclude_none=True)
199
+
200
+ kwargs = {
201
+ "data": {"task_result": task_result_data},
202
+ # Use httpx default timeout of 5s for all timeout types.
203
+ # For read timeouts, use 5 minutes to allow for large file uploads.
204
+ "timeout": Timeout(
205
+ 5.0,
206
+ read=5.0 * 60,
207
+ ),
208
+ "files": output_files if len(output_files) > 0 else FORCE_MULTIPART,
209
+ }
210
+
211
+ start_time = time.time()
212
+ with metric_server_ingest_files_latency.time():
213
+ metric_server_ingest_files_requests.inc()
214
+ # Run in a separate thread to not block the main event loop.
215
+ response = await asyncio.to_thread(
216
+ self._client.post,
217
+ url=f"{self._base_url}/internal/ingest_fn_outputs",
218
+ **kwargs,
219
+ )
220
+ end_time = time.time()
221
+ logger.info(
222
+ "files uploaded to server",
223
+ response_time=end_time - start_time,
224
+ response_code=response.status_code,
225
+ )
226
+
227
+ try:
228
+ response.raise_for_status()
229
+ except Exception as e:
230
+ metric_server_ingest_files_errors.inc()
231
+ # Caller catches and logs the exception.
232
+ raise Exception(
233
+ "failed to upload files. "
234
+ f"Response code: {response.status_code}. "
235
+ f"Response text: '{response.text}'."
236
+ ) from e
237
+
238
+ ingested_files_response = response.json()
239
+ return IngestFnOutputsResponse.model_validate(ingested_files_response)
240
+
241
+ async def _ingest_files_at_blob_store(
242
+ self, output: TaskOutput, logger: Any
243
+ ) -> IngestFnOutputsResponse:
244
+ start_time = time.time()
245
+ with (
246
+ metric_task_output_blob_store_upload_latency.time(),
247
+ metric_task_output_blob_store_upload_errors.count_exceptions(),
248
+ ):
249
+ metric_task_output_blob_store_uploads.inc()
250
+ response = await self._upload_output_to_blob_store(output, logger)
251
+
252
+ logger.info(
253
+ "files uploaded to blob store",
254
+ duration=time.time() - start_time,
255
+ )
256
+ return response
257
+
258
+ async def _upload_output_to_blob_store(
259
+ self, output: TaskOutput, logger: Any
260
+ ) -> IngestFnOutputsResponse:
261
+ data_payloads: List[DataPayload] = []
262
+ stdout: Optional[DataPayload] = None
263
+ stderr: Optional[DataPayload] = None
264
+
265
+ if output.stdout is not None:
266
+ stdout_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stdout"
267
+ stdout_bytes: bytes = output.stdout.encode()
268
+ await self._blob_store.put(stdout_url, stdout_bytes, logger)
269
+ stdout = DataPayload(
270
+ path=stdout_url,
271
+ size=len(stdout_bytes),
272
+ sha256_hash=_compute_hash(stdout_bytes),
273
+ )
274
+
275
+ if output.stderr is not None:
276
+ stderr_url = f"{output.output_payload_uri_prefix}.{output.task_id}.stderr"
277
+ stderr_bytes: bytes = output.stderr.encode()
278
+ await self._blob_store.put(stderr_url, stderr_bytes, logger)
279
+ stderr = DataPayload(
280
+ path=stderr_url,
281
+ size=len(stderr_bytes),
282
+ sha256_hash=_compute_hash(stderr_bytes),
283
+ )
284
+
285
+ if output.function_output is not None:
286
+ for func_output_item in output.function_output.outputs:
287
+ node_output_sequence = len(data_payloads)
288
+ if output.reducer:
289
+ # Reducer tasks have to write their results into the same blob.
290
+ output_url = (
291
+ f"{output.output_payload_uri_prefix}.{node_output_sequence}"
292
+ )
293
+ else:
294
+ # Regular tasks write their results into different blobs made unique using task ids.
295
+ output_url = f"{output.output_payload_uri_prefix}.{output.task_id}.{node_output_sequence}"
296
+
297
+ output_bytes: bytes = (
298
+ func_output_item.bytes
299
+ if func_output_item.HasField("bytes")
300
+ else func_output_item.string.encode()
301
+ )
302
+ await self._blob_store.put(output_url, output_bytes, logger)
303
+ data_payloads.append(
304
+ DataPayload(
305
+ path=output_url,
306
+ size=len(output_bytes),
307
+ sha256_hash=_compute_hash(output_bytes),
308
+ )
309
+ )
310
+
311
+ return IngestFnOutputsResponse(
312
+ data_payloads=data_payloads,
313
+ stdout=stdout,
314
+ stderr=stderr,
315
+ )
316
+
317
+ def _process_task_output(self, output: TaskOutput) -> Tuple[TaskResult, List[Any]]:
224
318
  task_result = TaskResult(
225
319
  outcome="failure",
226
320
  namespace=output.namespace,
@@ -231,9 +325,8 @@ class TaskReporter:
231
325
  task_id=output.task_id,
232
326
  )
233
327
  output_files: List[Any] = []
234
- summary: TaskOutputSummary = TaskOutputSummary()
235
328
  if output is None:
236
- return task_result, output_files, summary
329
+ return task_result, output_files
237
330
 
238
331
  task_result.outcome = (
239
332
  TASK_OUTCOME_SUCCESS if output.success else TASK_OUTCOME_FAILURE
@@ -241,33 +334,19 @@ class TaskReporter:
241
334
  task_result.reducer = output.reducer
242
335
 
243
336
  _process_function_output(
244
- function_output=output.function_output,
245
- output_files=output_files,
246
- summary=summary,
337
+ function_output=output.function_output, output_files=output_files
247
338
  )
248
339
  _process_router_output(
249
- router_output=output.router_output, task_result=task_result, summary=summary
250
- )
251
- _process_stdout(
252
- stdout=output.stdout, output_files=output_files, summary=summary
253
- )
254
- _process_stderr(
255
- stderr=output.stderr, output_files=output_files, summary=summary
340
+ router_output=output.router_output, task_result=task_result
256
341
  )
342
+ _process_stdout(stdout=output.stdout, output_files=output_files)
343
+ _process_stderr(stderr=output.stderr, output_files=output_files)
257
344
 
258
- summary.total_bytes = (
259
- summary.output_total_bytes
260
- + summary.stdout_total_bytes
261
- + summary.stderr_total_bytes
262
- )
263
-
264
- return task_result, output_files, summary
345
+ return task_result, output_files
265
346
 
266
347
 
267
348
  def _process_function_output(
268
- function_output: Optional[FunctionOutput],
269
- output_files: List[Any],
270
- summary: TaskOutputSummary,
349
+ function_output: Optional[FunctionOutput], output_files: List[Any]
271
350
  ) -> None:
272
351
  if function_output is None:
273
352
  return
@@ -280,25 +359,19 @@ def _process_function_output(
280
359
  (nanoid.generate(), payload, output.content_type),
281
360
  )
282
361
  )
283
- summary.output_count += 1
284
- summary.output_total_bytes += len(payload)
285
362
 
286
363
 
287
364
  def _process_router_output(
288
365
  router_output: Optional[RouterOutput],
289
366
  task_result: TaskResult,
290
- summary: TaskOutputSummary,
291
367
  ) -> None:
292
368
  if router_output is None:
293
369
  return
294
370
 
295
371
  task_result.router_output = RouterOutput(edges=router_output.edges)
296
- summary.router_output_count += 1
297
372
 
298
373
 
299
- def _process_stdout(
300
- stdout: Optional[str], output_files: List[Any], summary: TaskOutputSummary
301
- ) -> None:
374
+ def _process_stdout(stdout: Optional[str], output_files: List[Any]) -> None:
302
375
  if stdout is None:
303
376
  return
304
377
 
@@ -312,13 +385,9 @@ def _process_stdout(
312
385
  ),
313
386
  )
314
387
  )
315
- summary.stdout_count += 1
316
- summary.stdout_total_bytes += len(stdout)
317
388
 
318
389
 
319
- def _process_stderr(
320
- stderr: Optional[str], output_files: List[Any], summary: TaskOutputSummary
321
- ) -> None:
390
+ def _process_stderr(stderr: Optional[str], output_files: List[Any]) -> None:
322
391
  if stderr is None:
323
392
  return
324
393
 
@@ -332,8 +401,38 @@ def _process_stderr(
332
401
  ),
333
402
  )
334
403
  )
335
- summary.stderr_count += 1
336
- summary.stderr_total_bytes += len(stderr)
404
+
405
+
406
+ def _task_output_summary(output: TaskOutput) -> TaskOutputSummary:
407
+ summary: TaskOutputSummary = TaskOutputSummary()
408
+
409
+ if output.stdout is not None:
410
+ summary.stdout_count += 1
411
+ summary.stdout_total_bytes += len(output.stdout)
412
+
413
+ if output.stderr is not None:
414
+ summary.stderr_count += 1
415
+ summary.stderr_total_bytes += len(output.stderr)
416
+
417
+ if output.function_output is not None:
418
+ for func_output_item in output.function_output.outputs:
419
+ output_len: bytes = len(
420
+ func_output_item.bytes
421
+ if func_output_item.HasField("bytes")
422
+ else func_output_item.string
423
+ )
424
+ summary.output_count += 1
425
+ summary.output_total_bytes += output_len
426
+
427
+ if output.router_output is not None:
428
+ summary.router_output_count += 1
429
+
430
+ summary.total_bytes = (
431
+ summary.output_total_bytes
432
+ + summary.stdout_total_bytes
433
+ + summary.stderr_total_bytes
434
+ )
435
+ return summary
337
436
 
338
437
 
339
438
  def _to_grpc_task_outcome(task_output: TaskOutput) -> TaskOutcome:
@@ -355,3 +454,9 @@ def _to_grpc_data_payload_encoding(task_output: TaskOutput) -> DataPayloadEncodi
355
454
  return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON
356
455
  else:
357
456
  return DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE
457
+
458
+
459
+ def _compute_hash(data: bytes) -> str:
460
+ hasher = hashlib.sha256(usedforsecurity=False)
461
+ hasher.update(data)
462
+ return hasher.hexdigest()
@@ -85,6 +85,7 @@ class TaskRunner:
85
85
  function_name=task_input.task.compute_fn,
86
86
  graph_version=task_input.task.graph_version,
87
87
  graph_invocation_id=task_input.task.invocation_id,
88
+ output_payload_uri_prefix=task_input.task.output_payload_uri_prefix,
88
89
  )
89
90
  finally:
90
91
  if state is not None:
File without changes