indexify 0.4.12__tar.gz → 0.4.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.4.12 → indexify-0.4.13}/PKG-INFO +2 -2
- {indexify-0.4.12 → indexify-0.4.13}/pyproject.toml +2 -2
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/create_function_executor.py +10 -15
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_controller.py +22 -7
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_output.py +53 -1
- {indexify-0.4.12 → indexify-0.4.13}/README.md +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/__init__.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/build_image.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/deploy.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/README.md +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/blob_store.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/channel_manager.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_allowlist.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/function_executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/events.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/run_task.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/channel_manager.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/state_reconciler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/state_reporter.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/state_reconciler.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/state_reporter.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api.proto +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2.py +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2.pyi +0 -0
- {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.13
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
|
|
17
17
|
Requires-Dist: boto3 (>=1.37.30,<2.0.0)
|
18
18
|
Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
|
19
19
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
20
|
-
Requires-Dist: tensorlake (==0.2.
|
20
|
+
Requires-Dist: tensorlake (==0.2.12)
|
21
21
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.4.
|
4
|
+
version = "0.4.13"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -25,7 +25,7 @@ prometheus-client = "^0.21.1"
|
|
25
25
|
psutil = "^7.0.0"
|
26
26
|
# Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
|
27
27
|
# We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
|
28
|
-
tensorlake = "0.2.
|
28
|
+
tensorlake = "0.2.12"
|
29
29
|
# Uncomment the next line to use local tensorlake package (only for development!)
|
30
30
|
# tensorlake = { path = "../tensorlake", develop = true }
|
31
31
|
# pydantic is provided by tensorlake
|
@@ -5,7 +5,6 @@ from typing import Any, Optional, Tuple
|
|
5
5
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
6
6
|
InitializeRequest,
|
7
7
|
SerializedObject,
|
8
|
-
SerializedObjectEncoding,
|
9
8
|
)
|
10
9
|
|
11
10
|
from indexify.executor.blob_store.blob_store import BLOBStore
|
@@ -73,25 +72,21 @@ async def create_function_executor(
|
|
73
72
|
except BaseException as e:
|
74
73
|
if isinstance(e, asyncio.CancelledError):
|
75
74
|
logger.info("function executor startup was cancelled")
|
76
|
-
return FunctionExecutorCreated(
|
77
|
-
function_executor=None,
|
78
|
-
output=FunctionExecutorStartupOutput(
|
79
|
-
function_executor_description=function_executor_description,
|
80
|
-
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE,
|
81
|
-
),
|
82
|
-
)
|
83
75
|
else:
|
84
76
|
logger.error(
|
85
77
|
"failed to create function executor due to platform error",
|
86
78
|
exc_info=e,
|
87
79
|
)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
80
|
+
|
81
|
+
# Cancelled FE startup means that Server removed it from desired state so it doesn't matter what termination_reason we return
|
82
|
+
# in this case cause this FE will be removed from Executor reported state.
|
83
|
+
return FunctionExecutorCreated(
|
84
|
+
function_executor=None,
|
85
|
+
output=FunctionExecutorStartupOutput(
|
86
|
+
function_executor_description=function_executor_description,
|
87
|
+
termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
|
88
|
+
),
|
89
|
+
)
|
95
90
|
|
96
91
|
|
97
92
|
async def _initialization_result_to_fe_creation_output(
|
@@ -461,12 +461,25 @@ class FunctionExecutorController:
|
|
461
461
|
if event.function_executor is None:
|
462
462
|
# Server needs to increment attempts counter for all the tasks that were pending while FE was starting up.
|
463
463
|
# This prevents infinite retries if FEs consistently fail to start up.
|
464
|
+
# The allocations we marked here also need to not used FE terminated failure reason in their outputs
|
465
|
+
# because FE terminated means that the allocation wasn't the cause of the FE termination.
|
466
|
+
allocation_ids_caused_termination: List[str] = []
|
467
|
+
for task_info in self._tasks.values():
|
468
|
+
task_logger = task_allocation_logger(task_info.allocation, self._logger)
|
469
|
+
task_logger.info(
|
470
|
+
"marking allocation failed on function executor startup failure"
|
471
|
+
)
|
472
|
+
allocation_ids_caused_termination.append(
|
473
|
+
task_info.allocation.allocation_id
|
474
|
+
)
|
475
|
+
task_info.output = TaskOutput.function_executor_startup_failed(
|
476
|
+
allocation=task_info.allocation,
|
477
|
+
fe_startup_output=event.output,
|
478
|
+
logger=task_logger,
|
479
|
+
)
|
464
480
|
self._start_termination(
|
465
481
|
fe_termination_reason=event.output.termination_reason,
|
466
|
-
allocation_ids_caused_termination=
|
467
|
-
task_info.allocation.allocation_id
|
468
|
-
for task_info in self._tasks.values()
|
469
|
-
],
|
482
|
+
allocation_ids_caused_termination=allocation_ids_caused_termination,
|
470
483
|
)
|
471
484
|
return
|
472
485
|
|
@@ -593,9 +606,11 @@ class FunctionExecutorController:
|
|
593
606
|
_FE_CONTROLLER_STATE.TERMINATING,
|
594
607
|
_FE_CONTROLLER_STATE.TERMINATED,
|
595
608
|
]:
|
596
|
-
task_info.output
|
597
|
-
|
598
|
-
|
609
|
+
if task_info.output is None:
|
610
|
+
# The output can be set already by FE startup failure handler.
|
611
|
+
task_info.output = TaskOutput.function_executor_terminated(
|
612
|
+
task_info.allocation
|
613
|
+
)
|
599
614
|
self._start_task_output_upload(task_info)
|
600
615
|
elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
|
601
616
|
self._running_task = task_info
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, List, Optional
|
1
|
+
from typing import Any, Dict, List, Optional
|
2
2
|
|
3
3
|
from tensorlake.function_executor.proto.function_executor_pb2 import (
|
4
4
|
SerializedObject,
|
@@ -6,11 +6,14 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
|
|
6
6
|
|
7
7
|
from indexify.proto.executor_api_pb2 import (
|
8
8
|
DataPayload,
|
9
|
+
FunctionExecutorTerminationReason,
|
9
10
|
TaskAllocation,
|
10
11
|
TaskFailureReason,
|
11
12
|
TaskOutcomeCode,
|
12
13
|
)
|
13
14
|
|
15
|
+
from .function_executor_startup_output import FunctionExecutorStartupOutput
|
16
|
+
|
14
17
|
|
15
18
|
class TaskMetrics:
|
16
19
|
"""Metrics for a task."""
|
@@ -105,3 +108,52 @@ class TaskOutput:
|
|
105
108
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
106
109
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
107
110
|
)
|
111
|
+
|
112
|
+
@classmethod
|
113
|
+
def function_executor_startup_failed(
|
114
|
+
cls,
|
115
|
+
allocation: TaskAllocation,
|
116
|
+
fe_startup_output: FunctionExecutorStartupOutput,
|
117
|
+
logger: Any,
|
118
|
+
) -> "TaskOutput":
|
119
|
+
"""Creates a TaskOutput for the case when we fail a task because its FE startup failed."""
|
120
|
+
output = TaskOutput(
|
121
|
+
allocation=allocation,
|
122
|
+
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
123
|
+
failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
|
124
|
+
fe_startup_output.termination_reason, logger
|
125
|
+
),
|
126
|
+
)
|
127
|
+
# Use FE startup stdout, stderr for allocations that we failed because FE startup failed.
|
128
|
+
output.uploaded_stdout = fe_startup_output.stdout
|
129
|
+
output.uploaded_stderr = fe_startup_output.stderr
|
130
|
+
return output
|
131
|
+
|
132
|
+
|
133
|
+
def _fe_startup_failure_reason_to_task_failure_reason(
|
134
|
+
fe_termination_reason: FunctionExecutorTerminationReason, logger: Any
|
135
|
+
) -> TaskFailureReason:
|
136
|
+
# Only need to check FE termination reasons happening on FE startup.
|
137
|
+
if (
|
138
|
+
fe_termination_reason
|
139
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
|
140
|
+
):
|
141
|
+
return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR
|
142
|
+
elif (
|
143
|
+
fe_termination_reason
|
144
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT
|
145
|
+
):
|
146
|
+
return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT
|
147
|
+
elif (
|
148
|
+
fe_termination_reason
|
149
|
+
== FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR
|
150
|
+
):
|
151
|
+
return TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR
|
152
|
+
else:
|
153
|
+
logger.error(
|
154
|
+
"unexpected function executor startup failure reason",
|
155
|
+
fe_termination_reason=FunctionExecutorTerminationReason.Name(
|
156
|
+
fe_termination_reason
|
157
|
+
),
|
158
|
+
)
|
159
|
+
return TaskFailureReason.TASK_FAILURE_REASON_UNKNOWN
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/function_executor.py
RENAMED
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/health_checker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/downloads.py
RENAMED
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/events.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/loggers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/run_task.py
RENAMED
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_info.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_check_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/prometheus_metrics_handler.py
RENAMED
File without changes
|
File without changes
|
{indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/startup_probe_handler.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|