indexify 0.4.20__tar.gz → 0.4.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.4.20 → indexify-0.4.22}/PKG-INFO +7 -3
- {indexify-0.4.20 → indexify-0.4.22}/pyproject.toml +9 -4
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/cli/build_image.py +30 -2
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/function_executor_controller.py +37 -5
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/run_task.py +5 -4
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/run_task.py +158 -42
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/task_output.py +34 -1
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/proto/executor_api.proto +4 -2
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2.py +20 -20
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2.pyi +4 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/proto/executor_api_pb2_grpc.py +1 -1
- {indexify-0.4.20 → indexify-0.4.22}/README.md +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/cli/__init__.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/cli/deploy.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/cli/executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/README.md +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/channel_manager.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_allowlist.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/events.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/metrics/channel_manager.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/metrics/state_reconciler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/metrics/state_reporter.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/health_checker/metrics/health_checker.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/state_reconciler.py +0 -0
- {indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/state_reporter.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.22
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -14,12 +14,16 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
|
17
|
-
Requires-Dist: boto3 (>=1.39.
|
17
|
+
Requires-Dist: boto3 (>=1.39.15,<2.0.0)
|
18
|
+
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
18
19
|
Requires-Dist: httpx[http2] (==0.27.2)
|
20
|
+
Requires-Dist: nanoid (>=2.0.0,<3.0.0)
|
19
21
|
Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
|
20
22
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
21
23
|
Requires-Dist: pydantic (>=2.11,<3.0)
|
22
|
-
Requires-Dist:
|
24
|
+
Requires-Dist: requests (>=2.32.4,<3.0.0)
|
25
|
+
Requires-Dist: tensorlake (==0.2.37)
|
26
|
+
Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
|
23
27
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
24
28
|
Description-Content-Type: text/markdown
|
25
29
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.4.
|
4
|
+
version = "0.4.22"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -15,6 +15,7 @@ indexify-cli = "indexify.cli:cli"
|
|
15
15
|
[tool.poetry.dependencies]
|
16
16
|
# Common dependencies
|
17
17
|
python = "^3.10"
|
18
|
+
nanoid = "^2.0.0"
|
18
19
|
# structlog is provided by tensorlake
|
19
20
|
# pyyaml is provided by tensorlake
|
20
21
|
|
@@ -25,18 +26,22 @@ httpx = { version = "0.27.2", extras = ["http2"] }
|
|
25
26
|
pydantic = "^2.11"
|
26
27
|
prometheus-client = "^0.22.1"
|
27
28
|
psutil = "^7.0.0"
|
28
|
-
boto3 = "^1.39.
|
29
|
+
boto3 = "^1.39.15"
|
29
30
|
# Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
|
30
31
|
# We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
|
31
|
-
tensorlake = "0.2.
|
32
|
+
tensorlake = "0.2.37"
|
32
33
|
# Uncomment the next line to use local tensorlake package (only for development!)
|
33
34
|
# tensorlake = { path = "../tensorlake", develop = true }
|
34
35
|
# grpcio is provided by tensorlake
|
35
36
|
# grpcio-tools is provided by tensorlake
|
36
37
|
|
37
38
|
# CLI only
|
38
|
-
# nanoid is provided by tensorlake
|
39
39
|
# click is provided by tensorlake
|
40
|
+
docker = "^7.1.0"
|
41
|
+
|
42
|
+
# Packages pinned to mitigate vulnerabilities, not a direct dependcy of tensorlake
|
43
|
+
requests = "^2.32.4"
|
44
|
+
urllib3 = "^2.5.0"
|
40
45
|
|
41
46
|
[tool.poetry.group.dev.dependencies]
|
42
47
|
black = "25.1.0"
|
@@ -1,6 +1,11 @@
|
|
1
1
|
import importlib
|
2
|
+
from typing import Any, Generator, Tuple
|
2
3
|
|
3
4
|
import click
|
5
|
+
import docker
|
6
|
+
import docker.api.build
|
7
|
+
import docker.models
|
8
|
+
import docker.models.images
|
4
9
|
from tensorlake.functions_sdk.image import Image
|
5
10
|
from tensorlake.functions_sdk.workflow_module import (
|
6
11
|
WorkflowModuleInfo,
|
@@ -37,6 +42,9 @@ def build_image(
|
|
37
42
|
)
|
38
43
|
raise click.Abort
|
39
44
|
|
45
|
+
docker_client: docker.DockerClient = docker.from_env()
|
46
|
+
docker_client.ping()
|
47
|
+
|
40
48
|
indexify_version: str = importlib.metadata.version("indexify")
|
41
49
|
for image in workflow_module_info.images.keys():
|
42
50
|
image: Image
|
@@ -49,8 +57,28 @@ def build_image(
|
|
49
57
|
click.echo(f"Building image `{image.image_name}`")
|
50
58
|
|
51
59
|
image.run(f"pip install 'indexify=={indexify_version}'")
|
52
|
-
built_image,
|
53
|
-
|
60
|
+
built_image, logs_generator = image.build()
|
61
|
+
built_image: docker.models.images.Image
|
62
|
+
for output in logs_generator:
|
54
63
|
click.secho(output)
|
55
64
|
|
56
65
|
click.secho(f"built image: {built_image.tags[0]}", fg="green")
|
66
|
+
|
67
|
+
|
68
|
+
def build(
|
69
|
+
image: Image, docker_client: docker.DockerClient
|
70
|
+
) -> Tuple[docker.models.images.Image, Generator[str, Any, None]]:
|
71
|
+
docker_file = image.dockerfile()
|
72
|
+
image_name = f"{image.image_name}:{image.image_tag}"
|
73
|
+
|
74
|
+
docker.api.build.process_dockerfile = lambda dockerfile, path: (
|
75
|
+
"Dockerfile",
|
76
|
+
dockerfile,
|
77
|
+
)
|
78
|
+
|
79
|
+
return docker_client.images.build(
|
80
|
+
path=".",
|
81
|
+
dockerfile=docker_file,
|
82
|
+
tag=image_name,
|
83
|
+
rm=True,
|
84
|
+
)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import math
|
2
3
|
import time
|
3
4
|
from collections.abc import Coroutine
|
4
5
|
from enum import Enum
|
@@ -552,11 +553,21 @@ class FunctionExecutorController:
|
|
552
553
|
task_info: TaskInfo = event.task_info
|
553
554
|
|
554
555
|
if task_info.is_cancelled:
|
555
|
-
task_info.output = TaskOutput.task_cancelled(
|
556
|
+
task_info.output = TaskOutput.task_cancelled(
|
557
|
+
allocation=task_info.allocation,
|
558
|
+
# Task was prepared but never executed
|
559
|
+
execution_start_time=None,
|
560
|
+
execution_end_time=None,
|
561
|
+
)
|
556
562
|
self._start_task_output_upload(task_info)
|
557
563
|
return
|
558
564
|
if not event.is_success:
|
559
|
-
task_info.output = TaskOutput.internal_error(
|
565
|
+
task_info.output = TaskOutput.internal_error(
|
566
|
+
allocation=task_info.allocation,
|
567
|
+
# Task was prepared but never executed
|
568
|
+
execution_start_time=None,
|
569
|
+
execution_end_time=None,
|
570
|
+
)
|
560
571
|
self._start_task_output_upload(task_info)
|
561
572
|
return
|
562
573
|
|
@@ -599,7 +610,12 @@ class FunctionExecutorController:
|
|
599
610
|
)
|
600
611
|
|
601
612
|
if task_info.is_cancelled:
|
602
|
-
task_info.output = TaskOutput.task_cancelled(
|
613
|
+
task_info.output = TaskOutput.task_cancelled(
|
614
|
+
allocation=task_info.allocation,
|
615
|
+
# Task is runnable but it was never executed
|
616
|
+
execution_start_time=None,
|
617
|
+
execution_end_time=None,
|
618
|
+
)
|
603
619
|
self._start_task_output_upload(task_info)
|
604
620
|
elif self._internal_state in [
|
605
621
|
_FE_CONTROLLER_STATE.TERMINATING,
|
@@ -691,11 +707,16 @@ class FunctionExecutorController:
|
|
691
707
|
|
692
708
|
Doesn't raise any exceptions. Doesn't block.
|
693
709
|
"""
|
694
|
-
# Ignore task cancellation because we need to report it to the server anyway.
|
695
710
|
task_info: TaskInfo = event.task_info
|
696
711
|
if not event.is_success:
|
697
|
-
task_info.output
|
712
|
+
failed_to_upload_output: TaskOutput = task_info.output # Never None here
|
713
|
+
task_info.output = TaskOutput.internal_error(
|
714
|
+
allocation=task_info.allocation,
|
715
|
+
execution_start_time=failed_to_upload_output.execution_start_time,
|
716
|
+
execution_end_time=failed_to_upload_output.execution_end_time,
|
717
|
+
)
|
698
718
|
|
719
|
+
# Ignore task cancellation, we better report real task output to the server cause it's uploaded already.
|
699
720
|
self._complete_task(event.task_info)
|
700
721
|
|
701
722
|
def _complete_task(self, task_info: TaskInfo) -> None:
|
@@ -845,6 +866,16 @@ def _termination_reason_to_short_name(value: FunctionExecutorTerminationReason)
|
|
845
866
|
|
846
867
|
|
847
868
|
def _to_task_result_proto(output: TaskOutput) -> TaskResult:
|
869
|
+
execution_duration_ms: Optional[int] = None
|
870
|
+
if (
|
871
|
+
output.execution_start_time is not None
|
872
|
+
and output.execution_end_time is not None
|
873
|
+
):
|
874
|
+
# <= 0.99 ms functions get billed as 1 ms.
|
875
|
+
execution_duration_ms = math.ceil(
|
876
|
+
(output.execution_end_time - output.execution_start_time) * 1000
|
877
|
+
)
|
878
|
+
|
848
879
|
task_result = TaskResult(
|
849
880
|
task_id=output.allocation.task.id,
|
850
881
|
allocation_id=output.allocation.allocation_id,
|
@@ -858,6 +889,7 @@ def _to_task_result_proto(output: TaskOutput) -> TaskResult:
|
|
858
889
|
next_functions=output.next_functions,
|
859
890
|
function_outputs=output.uploaded_data_payloads,
|
860
891
|
invocation_error_output=output.uploaded_invocation_error_output,
|
892
|
+
execution_duration_ms=execution_duration_ms,
|
861
893
|
)
|
862
894
|
if output.uploaded_stdout is not None:
|
863
895
|
task_result.stdout.CopyFrom(output.uploaded_stdout)
|
@@ -6,23 +6,24 @@ from indexify.executor.monitoring.metrics import (
|
|
6
6
|
|
7
7
|
metric_function_executor_run_task_rpcs: prometheus_client.Counter = (
|
8
8
|
prometheus_client.Counter(
|
9
|
-
"function_executor_run_task_rpcs",
|
9
|
+
"function_executor_run_task_rpcs",
|
10
|
+
"Number of Function Executor run task lifecycle RPC sequences",
|
10
11
|
)
|
11
12
|
)
|
12
13
|
metric_function_executor_run_task_rpc_errors: prometheus_client.Counter = (
|
13
14
|
prometheus_client.Counter(
|
14
15
|
"function_executor_run_task_rpc_errors",
|
15
|
-
"Number of Function Executor run task RPC errors",
|
16
|
+
"Number of Function Executor run task lifecycle RPC errors",
|
16
17
|
)
|
17
18
|
)
|
18
19
|
metric_function_executor_run_task_rpc_latency: prometheus_client.Histogram = (
|
19
20
|
latency_metric_for_customer_controlled_operation(
|
20
|
-
"function_executor_run_task_rpc", "Function Executor run task RPC"
|
21
|
+
"function_executor_run_task_rpc", "Function Executor run task lifecycle RPC"
|
21
22
|
)
|
22
23
|
)
|
23
24
|
metric_function_executor_run_task_rpcs_in_progress: prometheus_client.Gauge = (
|
24
25
|
prometheus_client.Gauge(
|
25
26
|
"function_executor_run_task_rpcs_in_progress",
|
26
|
-
"Number of Function Executor run task RPCs in progress",
|
27
|
+
"Number of Function Executor run task lifecycle RPCs in progress",
|
27
28
|
)
|
28
29
|
)
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/run_task.py
RENAMED
@@ -6,9 +6,13 @@ from typing import Any, Optional
|
|
6
6
|
|
7
7
|
import grpc
|
8
8
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
9
|
-
|
10
|
-
|
9
|
+
AwaitTaskProgress,
|
10
|
+
AwaitTaskRequest,
|
11
|
+
CreateTaskRequest,
|
12
|
+
DeleteTaskRequest,
|
13
|
+
FunctionInputs,
|
11
14
|
SerializedObject,
|
15
|
+
Task,
|
12
16
|
)
|
13
17
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
14
18
|
TaskFailureReason as FETaskFailureReason,
|
@@ -16,6 +20,9 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
|
|
16
20
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
17
21
|
TaskOutcomeCode as FETaskOutcomeCode,
|
18
22
|
)
|
23
|
+
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
24
|
+
TaskResult,
|
25
|
+
)
|
19
26
|
from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
|
20
27
|
FunctionExecutorStub,
|
21
28
|
)
|
@@ -44,6 +51,9 @@ _ENABLE_INJECT_TASK_CANCELLATIONS = (
|
|
44
51
|
os.getenv("INDEXIFY_INJECT_TASK_CANCELLATIONS", "0") == "1"
|
45
52
|
)
|
46
53
|
|
54
|
+
_CREATE_TASK_TIMEOUT_SECS = 5
|
55
|
+
_DELETE_TASK_TIMEOUT_SECS = 5
|
56
|
+
|
47
57
|
|
48
58
|
async def run_task_on_function_executor(
|
49
59
|
task_info: TaskInfo, function_executor: FunctionExecutor, logger: Any
|
@@ -53,21 +63,21 @@ async def run_task_on_function_executor(
|
|
53
63
|
Doesn't raise any exceptions.
|
54
64
|
"""
|
55
65
|
logger = logger.bind(module=__name__)
|
56
|
-
|
66
|
+
task = Task(
|
67
|
+
task_id=task_info.allocation.task.id,
|
57
68
|
namespace=task_info.allocation.task.namespace,
|
58
69
|
graph_name=task_info.allocation.task.graph_name,
|
59
70
|
graph_version=task_info.allocation.task.graph_version,
|
60
71
|
function_name=task_info.allocation.task.function_name,
|
61
72
|
graph_invocation_id=task_info.allocation.task.graph_invocation_id,
|
62
|
-
task_id=task_info.allocation.task.id,
|
63
73
|
allocation_id=task_info.allocation.allocation_id,
|
64
|
-
function_input=task_info.input,
|
74
|
+
request=FunctionInputs(function_input=task_info.input),
|
65
75
|
)
|
66
76
|
# Don't keep the input in memory after we started running the task.
|
67
77
|
task_info.input = None
|
68
78
|
|
69
79
|
if task_info.init_value is not None:
|
70
|
-
request.function_init_value.CopyFrom(task_info.init_value)
|
80
|
+
task.request.function_init_value.CopyFrom(task_info.init_value)
|
71
81
|
# Don't keep the init value in memory after we started running the task.
|
72
82
|
task_info.init_value = None
|
73
83
|
|
@@ -78,52 +88,100 @@ async def run_task_on_function_executor(
|
|
78
88
|
|
79
89
|
metric_function_executor_run_task_rpcs.inc()
|
80
90
|
metric_function_executor_run_task_rpcs_in_progress.inc()
|
81
|
-
start_time = time.monotonic()
|
82
91
|
# Not None if the Function Executor should be terminated after running the task.
|
83
92
|
function_executor_termination_reason: Optional[
|
84
93
|
FunctionExecutorTerminationReason
|
85
94
|
] = None
|
86
95
|
|
96
|
+
# NB: We start this timer before invoking the first RPC, since
|
97
|
+
# user code should be executing by the time the create_task() RPC
|
98
|
+
# returns, so not attributing the task management RPC overhead to
|
99
|
+
# the user would open a possibility for abuse. (This is somewhat
|
100
|
+
# mitigated by the fact that these RPCs should have a very low
|
101
|
+
# overhead.)
|
102
|
+
execution_start_time: Optional[float] = time.monotonic()
|
103
|
+
|
87
104
|
# If this RPC failed due to customer code crashing the server we won't be
|
88
105
|
# able to detect this. We'll treat this as our own error for now and thus
|
89
106
|
# let the AioRpcError to be raised here.
|
90
107
|
timeout_sec = task_info.allocation.task.timeout_ms / 1000.0
|
91
108
|
try:
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
)
|
96
|
-
task_info.output = _task_output_from_function_executor_response(
|
109
|
+
task_result = await _run_task_rpcs(task, function_executor, timeout_sec)
|
110
|
+
|
111
|
+
task_info.output = _task_output_from_function_executor_result(
|
97
112
|
allocation=task_info.allocation,
|
98
|
-
|
113
|
+
result=task_result,
|
114
|
+
execution_start_time=execution_start_time,
|
115
|
+
execution_end_time=time.monotonic(),
|
99
116
|
logger=logger,
|
100
117
|
)
|
118
|
+
except asyncio.TimeoutError:
|
119
|
+
# This is an await_task() RPC timeout - we're not getting
|
120
|
+
# progress messages or a task completion.
|
121
|
+
function_executor_termination_reason = (
|
122
|
+
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT
|
123
|
+
)
|
124
|
+
task_info.output = TaskOutput.function_timeout(
|
125
|
+
allocation=task_info.allocation,
|
126
|
+
timeout_sec=timeout_sec,
|
127
|
+
execution_start_time=execution_start_time,
|
128
|
+
execution_end_time=time.monotonic(),
|
129
|
+
)
|
101
130
|
except grpc.aio.AioRpcError as e:
|
131
|
+
# This indicates some sort of problem communicating with the FE.
|
132
|
+
#
|
133
|
+
# NB: We charge the user in these situations: code within the
|
134
|
+
# FE is not isolated, so not charging would enable abuse.
|
135
|
+
#
|
136
|
+
# This is an unexpected situation, though, so we make sure to
|
137
|
+
# log the situation for further investigation.
|
138
|
+
|
139
|
+
function_executor_termination_reason = (
|
140
|
+
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY
|
141
|
+
)
|
142
|
+
metric_function_executor_run_task_rpc_errors.inc()
|
143
|
+
|
102
144
|
if e.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
|
103
|
-
#
|
104
|
-
|
105
|
-
|
106
|
-
)
|
107
|
-
task_info.output = TaskOutput.function_timeout(
|
108
|
-
allocation=task_info.allocation,
|
109
|
-
timeout_sec=timeout_sec,
|
110
|
-
)
|
145
|
+
# This is either a create_task() RPC timeout or a
|
146
|
+
# delete_task() RPC timeout; either suggests that the FE
|
147
|
+
# is unhealthy.
|
148
|
+
logger.error("task management RPC execution deadline exceeded", exc_info=e)
|
111
149
|
else:
|
112
|
-
|
113
|
-
|
114
|
-
|
150
|
+
# This is a status from an unsuccessful RPC; this
|
151
|
+
# shouldn't happen, but we handle it.
|
152
|
+
logger.error("task management RPC failed", exc_info=e)
|
153
|
+
|
154
|
+
task_info.output = TaskOutput.function_executor_unresponsive(
|
155
|
+
allocation=task_info.allocation,
|
156
|
+
execution_start_time=execution_start_time,
|
157
|
+
execution_end_time=time.monotonic(),
|
158
|
+
)
|
159
|
+
|
115
160
|
except asyncio.CancelledError:
|
116
161
|
# The task is still running in FE, we only cancelled the client-side RPC.
|
117
162
|
function_executor_termination_reason = (
|
118
163
|
FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED
|
119
164
|
)
|
120
|
-
task_info.output = TaskOutput.task_cancelled(
|
165
|
+
task_info.output = TaskOutput.task_cancelled(
|
166
|
+
allocation=task_info.allocation,
|
167
|
+
execution_start_time=execution_start_time,
|
168
|
+
execution_end_time=time.monotonic(),
|
169
|
+
)
|
121
170
|
except Exception as e:
|
122
|
-
|
123
|
-
|
124
|
-
|
171
|
+
# This is an unexpected exception; we believe that this
|
172
|
+
# indicates an internal error.
|
173
|
+
logger.error(
|
174
|
+
"Unexpected internal error during task lifecycle RPC sequence", exc_info=e
|
175
|
+
)
|
176
|
+
task_info.output = TaskOutput.internal_error(
|
177
|
+
allocation=task_info.allocation,
|
178
|
+
execution_start_time=execution_start_time,
|
179
|
+
execution_end_time=time.monotonic(),
|
180
|
+
)
|
125
181
|
|
126
|
-
metric_function_executor_run_task_rpc_latency.observe(
|
182
|
+
metric_function_executor_run_task_rpc_latency.observe(
|
183
|
+
time.monotonic() - execution_start_time
|
184
|
+
)
|
127
185
|
metric_function_executor_run_task_rpcs_in_progress.dec()
|
128
186
|
|
129
187
|
function_executor.invocation_state_client().remove_task_to_invocation_id_entry(
|
@@ -153,22 +211,78 @@ async def run_task_on_function_executor(
|
|
153
211
|
)
|
154
212
|
|
155
213
|
|
156
|
-
def
|
157
|
-
|
214
|
+
async def _run_task_rpcs(
|
215
|
+
task: Task, function_executor: FunctionExecutor, timeout_sec: float
|
216
|
+
) -> TaskResult:
|
217
|
+
"""Runs the task, returning the result, reporting errors via exceptions."""
|
218
|
+
|
219
|
+
response: AwaitTaskProgress
|
220
|
+
channel: grpc.aio.Channel = function_executor.channel()
|
221
|
+
fe_stub = FunctionExecutorStub(channel)
|
222
|
+
|
223
|
+
# Create task with timeout
|
224
|
+
await fe_stub.create_task(
|
225
|
+
CreateTaskRequest(task=task), timeout=_CREATE_TASK_TIMEOUT_SECS
|
226
|
+
)
|
227
|
+
|
228
|
+
# Await task with timeout resets on each response
|
229
|
+
await_rpc = fe_stub.await_task(AwaitTaskRequest(task_id=task.task_id))
|
230
|
+
|
231
|
+
try:
|
232
|
+
while True:
|
233
|
+
# Wait for next response with fresh timeout each time
|
234
|
+
response = await asyncio.wait_for(await_rpc.read(), timeout=timeout_sec)
|
235
|
+
if response.WhichOneof("response") == "task_result":
|
236
|
+
# We're done waiting.
|
237
|
+
break
|
238
|
+
|
239
|
+
# NB: We don't actually check for other message types
|
240
|
+
# here; any message from the FE is treated as an
|
241
|
+
# indication that it's making forward progress.
|
242
|
+
|
243
|
+
if response == grpc.aio.EOF:
|
244
|
+
# Protocol error: we should get a task_result before
|
245
|
+
# we see the RPC complete.
|
246
|
+
raise grpc.aio.AioRpcError(
|
247
|
+
grpc.StatusCode.CANCELLED,
|
248
|
+
None,
|
249
|
+
None,
|
250
|
+
"Function Executor didn't return function/task alloc response",
|
251
|
+
)
|
252
|
+
finally:
|
253
|
+
# Cancel the outstanding RPC to ensure any resources in use
|
254
|
+
# are cleaned up; note that this is idempotent (in case the
|
255
|
+
# RPC has already completed).
|
256
|
+
await_rpc.cancel()
|
257
|
+
|
258
|
+
# Delete task with timeout
|
259
|
+
await fe_stub.delete_task(
|
260
|
+
DeleteTaskRequest(task_id=task.task_id), timeout=_DELETE_TASK_TIMEOUT_SECS
|
261
|
+
)
|
262
|
+
|
263
|
+
return response.task_result
|
264
|
+
|
265
|
+
|
266
|
+
def _task_output_from_function_executor_result(
|
267
|
+
allocation: TaskAllocation,
|
268
|
+
result: TaskResult,
|
269
|
+
execution_start_time: Optional[float],
|
270
|
+
execution_end_time: Optional[float],
|
271
|
+
logger: Any,
|
158
272
|
) -> TaskOutput:
|
159
|
-
response_validator = MessageValidator(
|
273
|
+
response_validator = MessageValidator(result)
|
160
274
|
response_validator.required_field("stdout")
|
161
275
|
response_validator.required_field("stderr")
|
162
276
|
response_validator.required_field("outcome_code")
|
163
277
|
|
164
278
|
metrics = TaskMetrics(counters={}, timers={})
|
165
|
-
if
|
279
|
+
if result.HasField("metrics"):
|
166
280
|
# Can be None if e.g. function failed.
|
167
|
-
metrics.counters = dict(
|
168
|
-
metrics.timers = dict(
|
281
|
+
metrics.counters = dict(result.metrics.counters)
|
282
|
+
metrics.timers = dict(result.metrics.timers)
|
169
283
|
|
170
284
|
outcome_code: TaskOutcomeCode = _to_task_outcome_code(
|
171
|
-
|
285
|
+
result.outcome_code, logger=logger
|
172
286
|
)
|
173
287
|
failure_reason: Optional[TaskFailureReason] = None
|
174
288
|
invocation_error_output: Optional[SerializedObject] = None
|
@@ -176,11 +290,11 @@ def _task_output_from_function_executor_response(
|
|
176
290
|
if outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
|
177
291
|
response_validator.required_field("failure_reason")
|
178
292
|
failure_reason: Optional[TaskFailureReason] = _to_task_failure_reason(
|
179
|
-
|
293
|
+
result.failure_reason, logger
|
180
294
|
)
|
181
295
|
if failure_reason == TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR:
|
182
296
|
response_validator.required_field("invocation_error_output")
|
183
|
-
invocation_error_output =
|
297
|
+
invocation_error_output = result.invocation_error_output
|
184
298
|
|
185
299
|
if _ENABLE_INJECT_TASK_CANCELLATIONS:
|
186
300
|
logger.warning("injecting cancellation failure for the task allocation")
|
@@ -195,11 +309,13 @@ def _task_output_from_function_executor_response(
|
|
195
309
|
outcome_code=outcome_code,
|
196
310
|
failure_reason=failure_reason,
|
197
311
|
invocation_error_output=invocation_error_output,
|
198
|
-
function_outputs=
|
199
|
-
next_functions=
|
200
|
-
stdout=
|
201
|
-
stderr=
|
312
|
+
function_outputs=result.function_outputs,
|
313
|
+
next_functions=result.next_functions,
|
314
|
+
stdout=result.stdout,
|
315
|
+
stderr=result.stderr,
|
202
316
|
metrics=metrics,
|
317
|
+
execution_start_time=execution_start_time,
|
318
|
+
execution_end_time=execution_end_time,
|
203
319
|
)
|
204
320
|
|
205
321
|
|
@@ -38,6 +38,8 @@ class TaskOutput:
|
|
38
38
|
stdout: Optional[str] = None,
|
39
39
|
stderr: Optional[str] = None,
|
40
40
|
metrics: Optional[TaskMetrics] = None,
|
41
|
+
execution_start_time: Optional[float] = None,
|
42
|
+
execution_end_time: Optional[float] = None,
|
41
43
|
):
|
42
44
|
self.task = allocation.task
|
43
45
|
self.allocation = allocation
|
@@ -49,6 +51,8 @@ class TaskOutput:
|
|
49
51
|
self.failure_reason = failure_reason
|
50
52
|
self.invocation_error_output = invocation_error_output
|
51
53
|
self.metrics = metrics
|
54
|
+
self.execution_start_time = execution_start_time
|
55
|
+
self.execution_end_time = execution_end_time
|
52
56
|
self.uploaded_data_payloads: List[DataPayload] = []
|
53
57
|
self.uploaded_stdout: Optional[DataPayload] = None
|
54
58
|
self.uploaded_stderr: Optional[DataPayload] = None
|
@@ -58,6 +62,8 @@ class TaskOutput:
|
|
58
62
|
def internal_error(
|
59
63
|
cls,
|
60
64
|
allocation: TaskAllocation,
|
65
|
+
execution_start_time: Optional[float],
|
66
|
+
execution_end_time: Optional[float],
|
61
67
|
) -> "TaskOutput":
|
62
68
|
"""Creates a TaskOutput for an internal error."""
|
63
69
|
# We are not sharing internal error messages with the customer.
|
@@ -66,6 +72,8 @@ class TaskOutput:
|
|
66
72
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
67
73
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR,
|
68
74
|
stderr="Platform failed to execute the function.",
|
75
|
+
execution_start_time=execution_start_time,
|
76
|
+
execution_end_time=execution_end_time,
|
69
77
|
)
|
70
78
|
|
71
79
|
@classmethod
|
@@ -73,6 +81,8 @@ class TaskOutput:
|
|
73
81
|
cls,
|
74
82
|
allocation: TaskAllocation,
|
75
83
|
timeout_sec: float,
|
84
|
+
execution_start_time: Optional[float],
|
85
|
+
execution_end_time: Optional[float],
|
76
86
|
) -> "TaskOutput":
|
77
87
|
"""Creates a TaskOutput for an function timeout error."""
|
78
88
|
# Task stdout, stderr is not available.
|
@@ -81,18 +91,41 @@ class TaskOutput:
|
|
81
91
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
82
92
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
|
83
93
|
stderr=f"Function exceeded its configured timeout of {timeout_sec:.3f} sec.",
|
94
|
+
execution_start_time=execution_start_time,
|
95
|
+
execution_end_time=execution_end_time,
|
96
|
+
)
|
97
|
+
|
98
|
+
@classmethod
|
99
|
+
def function_executor_unresponsive(
|
100
|
+
cls,
|
101
|
+
allocation: TaskAllocation,
|
102
|
+
execution_start_time: Optional[float],
|
103
|
+
execution_end_time: Optional[float],
|
104
|
+
) -> "TaskOutput":
|
105
|
+
"""Creates a TaskOutput for an unresponsive FE."""
|
106
|
+
# Task stdout, stderr is not available.
|
107
|
+
return TaskOutput(
|
108
|
+
allocation=allocation,
|
109
|
+
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
110
|
+
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
|
111
|
+
execution_start_time=execution_start_time,
|
112
|
+
execution_end_time=execution_end_time,
|
84
113
|
)
|
85
114
|
|
86
115
|
@classmethod
|
87
116
|
def task_cancelled(
|
88
117
|
cls,
|
89
118
|
allocation: TaskAllocation,
|
119
|
+
execution_start_time: Optional[float],
|
120
|
+
execution_end_time: Optional[float],
|
90
121
|
) -> "TaskOutput":
|
91
122
|
"""Creates a TaskOutput for the case when task didn't finish because its allocation was removed by Server."""
|
92
123
|
return TaskOutput(
|
93
124
|
allocation=allocation,
|
94
125
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
95
126
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED,
|
127
|
+
execution_start_time=execution_start_time,
|
128
|
+
execution_end_time=execution_end_time,
|
96
129
|
)
|
97
130
|
|
98
131
|
@classmethod
|
@@ -114,7 +147,7 @@ class TaskOutput:
|
|
114
147
|
fe_startup_output: FunctionExecutorStartupOutput,
|
115
148
|
logger: Any,
|
116
149
|
) -> "TaskOutput":
|
117
|
-
"""Creates a TaskOutput for the case when we fail a task because its FE startup failed."""
|
150
|
+
"""Creates a TaskOutput for the case when we fail a task that didn't run because its FE startup failed."""
|
118
151
|
output = TaskOutput(
|
119
152
|
allocation=allocation,
|
120
153
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
@@ -84,7 +84,7 @@ enum FunctionExecutorTerminationReason {
|
|
84
84
|
FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY = 12;
|
85
85
|
// FE was terminated due to an unrecoverable internal error on Executor.
|
86
86
|
FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR = 13;
|
87
|
-
// FE was terminated because Function code exceeded its
|
87
|
+
// FE was terminated because Function code exceeded its configured timeout.
|
88
88
|
// FE termination is the only way to reliably stop the function code execution.
|
89
89
|
FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT = 14;
|
90
90
|
// FE was terminated because function allocation currently running on it was removed
|
@@ -244,7 +244,7 @@ enum TaskFailureReason {
|
|
244
244
|
TASK_FAILURE_REASON_INTERNAL_ERROR = 1;
|
245
245
|
// Clear function code failure typically by raising an exception from the function code.
|
246
246
|
TASK_FAILURE_REASON_FUNCTION_ERROR = 2;
|
247
|
-
// Function code run time exceeded its
|
247
|
+
// Function code run time exceeded its configured timeout.
|
248
248
|
TASK_FAILURE_REASON_FUNCTION_TIMEOUT = 3;
|
249
249
|
// Function code raised InvocationError to mark the invocation as permanently failed.
|
250
250
|
TASK_FAILURE_REASON_INVOCATION_ERROR = 4;
|
@@ -272,6 +272,8 @@ message TaskResult {
|
|
272
272
|
optional DataPayload stderr = 14;
|
273
273
|
// User payload for invocation error if task failed with invocation error.
|
274
274
|
optional DataPayload invocation_error_output = 15;
|
275
|
+
|
276
|
+
optional uint64 execution_duration_ms = 16;
|
275
277
|
}
|
276
278
|
|
277
279
|
// Internal API for scheduling and running tasks on Executors. Executors are acting as clients of this API.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
3
|
# NO CHECKED-IN PROTOBUF GENCODE
|
4
4
|
# source: indexify/proto/executor_api.proto
|
5
|
-
# Protobuf Python Version: 6.31.
|
5
|
+
# Protobuf Python Version: 6.31.1
|
6
6
|
"""Generated protocol buffer code."""
|
7
7
|
from google.protobuf import descriptor as _descriptor
|
8
8
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
@@ -11,7 +11,7 @@ from google.protobuf import symbol_database as _symbol_database
|
|
11
11
|
from google.protobuf.internal import builder as _builder
|
12
12
|
|
13
13
|
_runtime_version.ValidateProtobufRuntimeVersion(
|
14
|
-
_runtime_version.Domain.PUBLIC, 6, 31,
|
14
|
+
_runtime_version.Domain.PUBLIC, 6, 31, 1, "", "indexify/proto/executor_api.proto"
|
15
15
|
)
|
16
16
|
# @@protoc_insertion_point(imports)
|
17
17
|
|
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
|
|
19
19
|
|
20
20
|
|
21
21
|
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
22
|
-
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xcf\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x12)\n!allocation_ids_caused_termination\x18\x04 \x03(\tB\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\x8e\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x07\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_output*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
22
|
+
b'\n!indexify/proto/executor_api.proto\x12\x0f\x65xecutor_api_pb"\xeb\x01\n\x0b\x44\x61taPayload\x12\x11\n\x04size\x18\x02 \x01(\x04H\x00\x88\x01\x01\x12\x18\n\x0bsha256_hash\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x10\n\x03uri\x18\x04 \x01(\tH\x02\x88\x01\x01\x12;\n\x08\x65ncoding\x18\x05 \x01(\x0e\x32$.executor_api_pb.DataPayloadEncodingH\x03\x88\x01\x01\x12\x1d\n\x10\x65ncoding_version\x18\x06 \x01(\x04H\x04\x88\x01\x01\x42\x07\n\x05_sizeB\x0e\n\x0c_sha256_hashB\x06\n\x04_uriB\x0b\n\t_encodingB\x13\n\x11_encoding_version"e\n\x0cGPUResources\x12\x12\n\x05\x63ount\x18\x01 \x01(\rH\x00\x88\x01\x01\x12-\n\x05model\x18\x02 \x01(\x0e\x32\x19.executor_api_pb.GPUModelH\x01\x88\x01\x01\x42\x08\n\x06_countB\x08\n\x06_model"\xc2\x01\n\rHostResources\x12\x16\n\tcpu_count\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x0c\n\n_cpu_countB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xbb\x01\n\x0f\x41llowedFunction\x12\x16\n\tnamespace\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ngraph_name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x42\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_function_nameB\x10\n\x0e_graph_version"\xd8\x01\n\x19\x46unctionExecutorResources\x12\x1b\n\x0e\x63pu_ms_per_sec\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cmemory_bytes\x18\x02 \x01(\x04H\x01\x88\x01\x01\x12\x17\n\ndisk_bytes\x18\x03 \x01(\x04H\x02\x88\x01\x01\x12/\n\x03gpu\x18\x04 \x01(\x0b\x32\x1d.executor_api_pb.GPUResourcesH\x03\x88\x01\x01\x42\x11\n\x0f_cpu_ms_per_secB\x0f\n\r_memory_bytesB\r\n\x0b_disk_bytesB\x06\n\x04_gpu"\xb3\x04\n\x1b\x46unctionExecutorDescription\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x16\n\timage_uri\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x14\n\x0csecret_names\x18\x07 \x03(\t\x12%\n\x18\x63ustomer_code_timeout_ms\x18\t \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05graph\x18\n \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x42\n\tresources\x18\x0b \x01(\x0b\x32*.executor_api_pb.FunctionExecutorResourcesH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\x0c \x01(\tH\t\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x0c\n\n_image_uriB\x1b\n\x19_customer_code_timeout_msB\x08\n\x06_graphB\x0c\n\n_resourcesB\x1c\n\x1a_output_payload_uri_prefix"\xcf\x02\n\x15\x46unctionExecutorState\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12<\n\x06status\x18\x02 \x01(\x0e\x32\'.executor_api_pb.FunctionExecutorStatusH\x01\x88\x01\x01\x12S\n\x12termination_reason\x18\x03 \x01(\x0e\x32\x32.executor_api_pb.FunctionExecutorTerminationReasonH\x02\x88\x01\x01\x12)\n!allocation_ids_caused_termination\x18\x04 \x03(\tB\x0e\n\x0c_descriptionB\t\n\x07_statusB\x15\n\x13_termination_reason"\x8c\x02\n\x16\x46unctionExecutorUpdate\x12\x46\n\x0b\x64\x65scription\x18\x01 \x01(\x0b\x32,.executor_api_pb.FunctionExecutorDescriptionH\x00\x88\x01\x01\x12\x39\n\x0estartup_stdout\x18\x02 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x01\x88\x01\x01\x12\x39\n\x0estartup_stderr\x18\x03 \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x02\x88\x01\x01\x42\x0e\n\x0c_descriptionB\x11\n\x0f_startup_stdoutB\x11\n\x0f_startup_stderr"\xce\x05\n\rExecutorState\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08hostname\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07version\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x34\n\x06status\x18\x06 \x01(\x0e\x32\x1f.executor_api_pb.ExecutorStatusH\x03\x88\x01\x01\x12<\n\x0ftotal_resources\x18\r \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x04\x88\x01\x01\x12N\n!total_function_executor_resources\x18\x07 \x01(\x0b\x32\x1e.executor_api_pb.HostResourcesH\x05\x88\x01\x01\x12;\n\x11\x61llowed_functions\x18\x08 \x03(\x0b\x32 .executor_api_pb.AllowedFunction\x12H\n\x18\x66unction_executor_states\x18\t \x03(\x0b\x32&.executor_api_pb.FunctionExecutorState\x12:\n\x06labels\x18\n \x03(\x0b\x32*.executor_api_pb.ExecutorState.LabelsEntry\x12\x17\n\nstate_hash\x18\x0b \x01(\tH\x06\x88\x01\x01\x12\x19\n\x0cserver_clock\x18\x0c \x01(\x04H\x07\x88\x01\x01\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0e\n\x0c_executor_idB\x0b\n\t_hostnameB\n\n\x08_versionB\t\n\x07_statusB\x12\n\x10_total_resourcesB$\n"_total_function_executor_resourcesB\r\n\x0b_state_hashB\x0f\n\r_server_clock"\xb9\x01\n\x0e\x45xecutorUpdate\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x0ctask_results\x18\x02 \x03(\x0b\x32\x1b.executor_api_pb.TaskResult\x12J\n\x19\x66unction_executor_updates\x18\x03 \x03(\x0b\x32\'.executor_api_pb.FunctionExecutorUpdateB\x0e\n\x0c_executor_id"\xbf\x01\n\x1aReportExecutorStateRequest\x12;\n\x0e\x65xecutor_state\x18\x01 \x01(\x0b\x32\x1e.executor_api_pb.ExecutorStateH\x00\x88\x01\x01\x12=\n\x0f\x65xecutor_update\x18\x02 \x01(\x0b\x32\x1f.executor_api_pb.ExecutorUpdateH\x01\x88\x01\x01\x42\x11\n\x0f_executor_stateB\x12\n\x10_executor_update"\x1d\n\x1bReportExecutorStateResponse"\xcf\x01\n\x0fTaskRetryPolicy\x12\x18\n\x0bmax_retries\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1d\n\x10initial_delay_ms\x18\x02 \x01(\rH\x01\x88\x01\x01\x12\x19\n\x0cmax_delay_ms\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1d\n\x10\x64\x65lay_multiplier\x18\x04 \x01(\rH\x03\x88\x01\x01\x42\x0e\n\x0c_max_retriesB\x13\n\x11_initial_delay_msB\x0f\n\r_max_delay_msB\x13\n\x11_delay_multiplier"\xc6\x04\n\x04Task\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tnamespace\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x17\n\ngraph_name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x05 \x01(\tH\x04\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x06 \x01(\tH\x05\x88\x01\x01\x12\x17\n\ntimeout_ms\x18\n \x01(\rH\x06\x88\x01\x01\x12\x30\n\x05input\x18\x0b \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x07\x88\x01\x01\x12\x38\n\rreducer_input\x18\x0c \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x08\x88\x01\x01\x12&\n\x19output_payload_uri_prefix\x18\r \x01(\tH\t\x88\x01\x01\x12;\n\x0cretry_policy\x18\x0e \x01(\x0b\x32 .executor_api_pb.TaskRetryPolicyH\n\x88\x01\x01\x42\x05\n\x03_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\r\n\x0b_timeout_msB\x08\n\x06_inputB\x10\n\x0e_reducer_inputB\x1c\n\x1a_output_payload_uri_prefixB\x0f\n\r_retry_policy"\xad\x01\n\x0eTaskAllocation\x12!\n\x14\x66unction_executor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12(\n\x04task\x18\x02 \x01(\x0b\x32\x15.executor_api_pb.TaskH\x01\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x03 \x01(\tH\x02\x88\x01\x01\x42\x17\n\x15_function_executor_idB\x07\n\x05_taskB\x10\n\x0e_allocation_id"K\n\x1fGetDesiredExecutorStatesRequest\x12\x18\n\x0b\x65xecutor_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0e\n\x0c_executor_id"\xb9\x01\n\x14\x44\x65siredExecutorState\x12H\n\x12\x66unction_executors\x18\x01 \x03(\x0b\x32,.executor_api_pb.FunctionExecutorDescription\x12\x39\n\x10task_allocations\x18\x02 \x03(\x0b\x32\x1f.executor_api_pb.TaskAllocation\x12\x12\n\x05\x63lock\x18\x03 \x01(\x04H\x00\x88\x01\x01\x42\x08\n\x06_clock"\xcc\x06\n\nTaskResult\x12\x14\n\x07task_id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x1a\n\rallocation_id\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tnamespace\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x17\n\ngraph_name\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x1a\n\rgraph_version\x18\x05 \x01(\tH\x04\x88\x01\x01\x12\x1a\n\rfunction_name\x18\x06 \x01(\tH\x05\x88\x01\x01\x12 \n\x13graph_invocation_id\x18\x07 \x01(\tH\x06\x88\x01\x01\x12;\n\x0coutcome_code\x18\t \x01(\x0e\x32 .executor_api_pb.TaskOutcomeCodeH\x07\x88\x01\x01\x12?\n\x0e\x66\x61ilure_reason\x18\n \x01(\x0e\x32".executor_api_pb.TaskFailureReasonH\x08\x88\x01\x01\x12\x16\n\x0enext_functions\x18\x0b \x03(\t\x12\x36\n\x10\x66unction_outputs\x18\x0c \x03(\x0b\x32\x1c.executor_api_pb.DataPayload\x12\x31\n\x06stdout\x18\r \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\t\x88\x01\x01\x12\x31\n\x06stderr\x18\x0e \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\n\x88\x01\x01\x12\x42\n\x17invocation_error_output\x18\x0f \x01(\x0b\x32\x1c.executor_api_pb.DataPayloadH\x0b\x88\x01\x01\x12"\n\x15\x65xecution_duration_ms\x18\x10 \x01(\x04H\x0c\x88\x01\x01\x42\n\n\x08_task_idB\x10\n\x0e_allocation_idB\x0c\n\n_namespaceB\r\n\x0b_graph_nameB\x10\n\x0e_graph_versionB\x10\n\x0e_function_nameB\x16\n\x14_graph_invocation_idB\x0f\n\r_outcome_codeB\x11\n\x0f_failure_reasonB\t\n\x07_stdoutB\t\n\x07_stderrB\x1a\n\x18_invocation_error_outputB\x18\n\x16_execution_duration_ms*\xd1\x01\n\x13\x44\x61taPayloadEncoding\x12!\n\x1d\x44\x41TA_PAYLOAD_ENCODING_UNKNOWN\x10\x00\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_JSON\x10\x01\x12#\n\x1f\x44\x41TA_PAYLOAD_ENCODING_UTF8_TEXT\x10\x02\x12\'\n#DATA_PAYLOAD_ENCODING_BINARY_PICKLE\x10\x03\x12$\n DATA_PAYLOAD_ENCODING_BINARY_ZIP\x10\x04*\xd6\x01\n\x08GPUModel\x12\x15\n\x11GPU_MODEL_UNKNOWN\x10\x00\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_40GB\x10\x01\x12\x1e\n\x1aGPU_MODEL_NVIDIA_A100_80GB\x10\x02\x12\x1e\n\x1aGPU_MODEL_NVIDIA_H100_80GB\x10\x03\x12\x1d\n\x19GPU_MODEL_NVIDIA_TESLA_T4\x10\x04\x12\x1a\n\x16GPU_MODEL_NVIDIA_A6000\x10\x05\x12\x18\n\x14GPU_MODEL_NVIDIA_A10\x10\x06*\xb3\x01\n\x16\x46unctionExecutorStatus\x12$\n FUNCTION_EXECUTOR_STATUS_UNKNOWN\x10\x00\x12$\n FUNCTION_EXECUTOR_STATUS_PENDING\x10\x01\x12$\n FUNCTION_EXECUTOR_STATUS_RUNNING\x10\x02\x12\'\n#FUNCTION_EXECUTOR_STATUS_TERMINATED\x10\x03*\x94\x04\n!FunctionExecutorTerminationReason\x12\x30\n,FUNCTION_EXECUTOR_TERMINATION_REASON_UNKNOWN\x10\x00\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR\x10\x01\x12\x46\nBFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR\x10\x02\x12H\nDFUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT\x10\x03\x12\x32\n.FUNCTION_EXECUTOR_TERMINATION_REASON_UNHEALTHY\x10\x0c\x12\x37\n3FUNCTION_EXECUTOR_TERMINATION_REASON_INTERNAL_ERROR\x10\r\x12\x39\n5FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_TIMEOUT\x10\x0e\x12;\n7FUNCTION_EXECUTOR_TERMINATION_REASON_FUNCTION_CANCELLED\x10\x0f*\xa5\x01\n\x0e\x45xecutorStatus\x12\x1b\n\x17\x45XECUTOR_STATUS_UNKNOWN\x10\x00\x12\x1f\n\x1b\x45XECUTOR_STATUS_STARTING_UP\x10\x01\x12\x1b\n\x17\x45XECUTOR_STATUS_RUNNING\x10\x02\x12\x1b\n\x17\x45XECUTOR_STATUS_DRAINED\x10\x03\x12\x1b\n\x17\x45XECUTOR_STATUS_STOPPED\x10\x04*n\n\x0fTaskOutcomeCode\x12\x1d\n\x19TASK_OUTCOME_CODE_UNKNOWN\x10\x00\x12\x1d\n\x19TASK_OUTCOME_CODE_SUCCESS\x10\x01\x12\x1d\n\x19TASK_OUTCOME_CODE_FAILURE\x10\x02*\xb6\x02\n\x11TaskFailureReason\x12\x1f\n\x1bTASK_FAILURE_REASON_UNKNOWN\x10\x00\x12&\n"TASK_FAILURE_REASON_INTERNAL_ERROR\x10\x01\x12&\n"TASK_FAILURE_REASON_FUNCTION_ERROR\x10\x02\x12(\n$TASK_FAILURE_REASON_FUNCTION_TIMEOUT\x10\x03\x12(\n$TASK_FAILURE_REASON_INVOCATION_ERROR\x10\x04\x12&\n"TASK_FAILURE_REASON_TASK_CANCELLED\x10\x05\x12\x34\n0TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED\x10\x06\x32\xff\x01\n\x0b\x45xecutorAPI\x12t\n\x15report_executor_state\x12+.executor_api_pb.ReportExecutorStateRequest\x1a,.executor_api_pb.ReportExecutorStateResponse"\x00\x12z\n\x1bget_desired_executor_states\x12\x30.executor_api_pb.GetDesiredExecutorStatesRequest\x1a%.executor_api_pb.DesiredExecutorState"\x00\x30\x01\x62\x06proto3'
|
23
23
|
)
|
24
24
|
|
25
25
|
_globals = globals()
|
@@ -31,20 +31,20 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
31
31
|
DESCRIPTOR._loaded_options = None
|
32
32
|
_globals["_EXECUTORSTATE_LABELSENTRY"]._loaded_options = None
|
33
33
|
_globals["_EXECUTORSTATE_LABELSENTRY"]._serialized_options = b"8\001"
|
34
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_start =
|
35
|
-
_globals["_DATAPAYLOADENCODING"]._serialized_end =
|
36
|
-
_globals["_GPUMODEL"]._serialized_start =
|
37
|
-
_globals["_GPUMODEL"]._serialized_end =
|
38
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start =
|
39
|
-
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end =
|
40
|
-
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start =
|
41
|
-
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end =
|
42
|
-
_globals["_EXECUTORSTATUS"]._serialized_start =
|
43
|
-
_globals["_EXECUTORSTATUS"]._serialized_end =
|
44
|
-
_globals["_TASKOUTCOMECODE"]._serialized_start =
|
45
|
-
_globals["_TASKOUTCOMECODE"]._serialized_end =
|
46
|
-
_globals["_TASKFAILUREREASON"]._serialized_start =
|
47
|
-
_globals["_TASKFAILUREREASON"]._serialized_end =
|
34
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_start = 5394
|
35
|
+
_globals["_DATAPAYLOADENCODING"]._serialized_end = 5603
|
36
|
+
_globals["_GPUMODEL"]._serialized_start = 5606
|
37
|
+
_globals["_GPUMODEL"]._serialized_end = 5820
|
38
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_start = 5823
|
39
|
+
_globals["_FUNCTIONEXECUTORSTATUS"]._serialized_end = 6002
|
40
|
+
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_start = 6005
|
41
|
+
_globals["_FUNCTIONEXECUTORTERMINATIONREASON"]._serialized_end = 6537
|
42
|
+
_globals["_EXECUTORSTATUS"]._serialized_start = 6540
|
43
|
+
_globals["_EXECUTORSTATUS"]._serialized_end = 6705
|
44
|
+
_globals["_TASKOUTCOMECODE"]._serialized_start = 6707
|
45
|
+
_globals["_TASKOUTCOMECODE"]._serialized_end = 6817
|
46
|
+
_globals["_TASKFAILUREREASON"]._serialized_start = 6820
|
47
|
+
_globals["_TASKFAILUREREASON"]._serialized_end = 7130
|
48
48
|
_globals["_DATAPAYLOAD"]._serialized_start = 55
|
49
49
|
_globals["_DATAPAYLOAD"]._serialized_end = 290
|
50
50
|
_globals["_GPURESOURCES"]._serialized_start = 292
|
@@ -82,7 +82,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
|
82
82
|
_globals["_DESIREDEXECUTORSTATE"]._serialized_start = 4359
|
83
83
|
_globals["_DESIREDEXECUTORSTATE"]._serialized_end = 4544
|
84
84
|
_globals["_TASKRESULT"]._serialized_start = 4547
|
85
|
-
_globals["_TASKRESULT"]._serialized_end =
|
86
|
-
_globals["_EXECUTORAPI"]._serialized_start =
|
87
|
-
_globals["_EXECUTORAPI"]._serialized_end =
|
85
|
+
_globals["_TASKRESULT"]._serialized_end = 5391
|
86
|
+
_globals["_EXECUTORAPI"]._serialized_start = 7133
|
87
|
+
_globals["_EXECUTORAPI"]._serialized_end = 7388
|
88
88
|
# @@protoc_insertion_point(module_scope)
|
@@ -543,6 +543,7 @@ class TaskResult(_message.Message):
|
|
543
543
|
"stdout",
|
544
544
|
"stderr",
|
545
545
|
"invocation_error_output",
|
546
|
+
"execution_duration_ms",
|
546
547
|
)
|
547
548
|
TASK_ID_FIELD_NUMBER: _ClassVar[int]
|
548
549
|
ALLOCATION_ID_FIELD_NUMBER: _ClassVar[int]
|
@@ -558,6 +559,7 @@ class TaskResult(_message.Message):
|
|
558
559
|
STDOUT_FIELD_NUMBER: _ClassVar[int]
|
559
560
|
STDERR_FIELD_NUMBER: _ClassVar[int]
|
560
561
|
INVOCATION_ERROR_OUTPUT_FIELD_NUMBER: _ClassVar[int]
|
562
|
+
EXECUTION_DURATION_MS_FIELD_NUMBER: _ClassVar[int]
|
561
563
|
task_id: str
|
562
564
|
allocation_id: str
|
563
565
|
namespace: str
|
@@ -572,6 +574,7 @@ class TaskResult(_message.Message):
|
|
572
574
|
stdout: DataPayload
|
573
575
|
stderr: DataPayload
|
574
576
|
invocation_error_output: DataPayload
|
577
|
+
execution_duration_ms: int
|
575
578
|
def __init__(
|
576
579
|
self,
|
577
580
|
task_id: _Optional[str] = ...,
|
@@ -588,4 +591,5 @@ class TaskResult(_message.Message):
|
|
588
591
|
stdout: _Optional[_Union[DataPayload, _Mapping]] = ...,
|
589
592
|
stderr: _Optional[_Union[DataPayload, _Mapping]] = ...,
|
590
593
|
invocation_error_output: _Optional[_Union[DataPayload, _Mapping]] = ...,
|
594
|
+
execution_duration_ms: _Optional[int] = ...,
|
591
595
|
) -> None: ...
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/downloads.py
RENAMED
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/events.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/loggers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/function_executor_controller/task_info.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.20 → indexify-0.4.22}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|