indexify 0.4.12__tar.gz → 0.4.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {indexify-0.4.12 → indexify-0.4.13}/PKG-INFO +2 -2
  2. {indexify-0.4.12 → indexify-0.4.13}/pyproject.toml +2 -2
  3. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/create_function_executor.py +10 -15
  4. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_controller.py +22 -7
  5. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_output.py +53 -1
  6. {indexify-0.4.12 → indexify-0.4.13}/README.md +0 -0
  7. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/__init__.py +0 -0
  8. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/build_image.py +0 -0
  9. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/deploy.py +0 -0
  10. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/cli/executor.py +0 -0
  11. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/README.md +0 -0
  12. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/blob_store.py +0 -0
  13. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
  14. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  15. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
  16. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/channel_manager.py +0 -0
  17. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/executor.py +0 -0
  18. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_allowlist.py +0 -0
  19. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/function_executor.py +0 -0
  20. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/health_checker.py +0 -0
  21. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  22. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  23. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  24. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  25. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  26. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  27. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  28. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  29. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  30. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
  31. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
  32. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
  33. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/downloads.py +0 -0
  34. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/events.py +0 -0
  35. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -0
  36. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
  37. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/message_validators.py +0 -0
  38. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
  39. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -0
  40. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
  41. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
  42. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -0
  43. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/prepare_task.py +0 -0
  44. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/run_task.py +0 -0
  45. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/task_info.py +0 -0
  46. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +0 -0
  47. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -0
  48. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/host_resources.py +0 -0
  49. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  50. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  51. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/channel_manager.py +0 -0
  52. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/executor.py +0 -0
  53. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/state_reconciler.py +0 -0
  54. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/metrics/state_reporter.py +0 -0
  55. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/handler.py +0 -0
  56. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  57. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  58. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  59. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/metrics.py +0 -0
  60. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  61. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/server.py +0 -0
  62. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  63. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/state_reconciler.py +0 -0
  64. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/executor/state_reporter.py +0 -0
  65. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api.proto +0 -0
  66. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2.py +0 -0
  67. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2.pyi +0 -0
  68. {indexify-0.4.12 → indexify-0.4.13}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.12
3
+ Version: 0.4.13
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
17
17
  Requires-Dist: boto3 (>=1.37.30,<2.0.0)
18
18
  Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
19
19
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
20
- Requires-Dist: tensorlake (==0.2.8)
20
+ Requires-Dist: tensorlake (==0.2.12)
21
21
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.4.12"
4
+ version = "0.4.13"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -25,7 +25,7 @@ prometheus-client = "^0.21.1"
25
25
  psutil = "^7.0.0"
26
26
  # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
27
27
  # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
28
- tensorlake = "0.2.8"
28
+ tensorlake = "0.2.12"
29
29
  # Uncomment the next line to use local tensorlake package (only for development!)
30
30
  # tensorlake = { path = "../tensorlake", develop = true }
31
31
  # pydantic is provided by tensorlake
@@ -5,7 +5,6 @@ from typing import Any, Optional, Tuple
5
5
  from tensorlake.function_executor.proto.function_executor_pb2 import (
6
6
  InitializeRequest,
7
7
  SerializedObject,
8
- SerializedObjectEncoding,
9
8
  )
10
9
 
11
10
  from indexify.executor.blob_store.blob_store import BLOBStore
@@ -73,25 +72,21 @@ async def create_function_executor(
73
72
  except BaseException as e:
74
73
  if isinstance(e, asyncio.CancelledError):
75
74
  logger.info("function executor startup was cancelled")
76
- return FunctionExecutorCreated(
77
- function_executor=None,
78
- output=FunctionExecutorStartupOutput(
79
- function_executor_description=function_executor_description,
80
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE,
81
- ),
82
- )
83
75
  else:
84
76
  logger.error(
85
77
  "failed to create function executor due to platform error",
86
78
  exc_info=e,
87
79
  )
88
- return FunctionExecutorCreated(
89
- function_executor=None,
90
- output=FunctionExecutorStartupOutput(
91
- function_executor_description=function_executor_description,
92
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
93
- ),
94
- )
80
+
81
+ # Cancelled FE startup means that Server removed it from desired state so it doesn't matter what termination_reason we return
82
+ # in this case cause this FE will be removed from Executor reported state.
83
+ return FunctionExecutorCreated(
84
+ function_executor=None,
85
+ output=FunctionExecutorStartupOutput(
86
+ function_executor_description=function_executor_description,
87
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
88
+ ),
89
+ )
95
90
 
96
91
 
97
92
  async def _initialization_result_to_fe_creation_output(
@@ -461,12 +461,25 @@ class FunctionExecutorController:
461
461
  if event.function_executor is None:
462
462
  # Server needs to increment attempts counter for all the tasks that were pending while FE was starting up.
463
463
  # This prevents infinite retries if FEs consistently fail to start up.
464
+ # The allocations we marked here also need to not used FE terminated failure reason in their outputs
465
+ # because FE terminated means that the allocation wasn't the cause of the FE termination.
466
+ allocation_ids_caused_termination: List[str] = []
467
+ for task_info in self._tasks.values():
468
+ task_logger = task_allocation_logger(task_info.allocation, self._logger)
469
+ task_logger.info(
470
+ "marking allocation failed on function executor startup failure"
471
+ )
472
+ allocation_ids_caused_termination.append(
473
+ task_info.allocation.allocation_id
474
+ )
475
+ task_info.output = TaskOutput.function_executor_startup_failed(
476
+ allocation=task_info.allocation,
477
+ fe_startup_output=event.output,
478
+ logger=task_logger,
479
+ )
464
480
  self._start_termination(
465
481
  fe_termination_reason=event.output.termination_reason,
466
- allocation_ids_caused_termination=[
467
- task_info.allocation.allocation_id
468
- for task_info in self._tasks.values()
469
- ],
482
+ allocation_ids_caused_termination=allocation_ids_caused_termination,
470
483
  )
471
484
  return
472
485
 
@@ -593,9 +606,11 @@ class FunctionExecutorController:
593
606
  _FE_CONTROLLER_STATE.TERMINATING,
594
607
  _FE_CONTROLLER_STATE.TERMINATED,
595
608
  ]:
596
- task_info.output = TaskOutput.function_executor_terminated(
597
- task_info.allocation
598
- )
609
+ if task_info.output is None:
610
+ # The output can be set already by FE startup failure handler.
611
+ task_info.output = TaskOutput.function_executor_terminated(
612
+ task_info.allocation
613
+ )
599
614
  self._start_task_output_upload(task_info)
600
615
  elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
601
616
  self._running_task = task_info
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional
2
2
 
3
3
  from tensorlake.function_executor.proto.function_executor_pb2 import (
4
4
  SerializedObject,
@@ -6,11 +6,14 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
6
6
 
7
7
  from indexify.proto.executor_api_pb2 import (
8
8
  DataPayload,
9
+ FunctionExecutorTerminationReason,
9
10
  TaskAllocation,
10
11
  TaskFailureReason,
11
12
  TaskOutcomeCode,
12
13
  )
13
14
 
15
+ from .function_executor_startup_output import FunctionExecutorStartupOutput
16
+
14
17
 
15
18
  class TaskMetrics:
16
19
  """Metrics for a task."""
@@ -105,3 +108,52 @@ class TaskOutput:
105
108
  outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
106
109
  failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
107
110
  )
111
+
112
+ @classmethod
113
+ def function_executor_startup_failed(
114
+ cls,
115
+ allocation: TaskAllocation,
116
+ fe_startup_output: FunctionExecutorStartupOutput,
117
+ logger: Any,
118
+ ) -> "TaskOutput":
119
+ """Creates a TaskOutput for the case when we fail a task because its FE startup failed."""
120
+ output = TaskOutput(
121
+ allocation=allocation,
122
+ outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
123
+ failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
124
+ fe_startup_output.termination_reason, logger
125
+ ),
126
+ )
127
+ # Use FE startup stdout, stderr for allocations that we failed because FE startup failed.
128
+ output.uploaded_stdout = fe_startup_output.stdout
129
+ output.uploaded_stderr = fe_startup_output.stderr
130
+ return output
131
+
132
+
133
+ def _fe_startup_failure_reason_to_task_failure_reason(
134
+ fe_termination_reason: FunctionExecutorTerminationReason, logger: Any
135
+ ) -> TaskFailureReason:
136
+ # Only need to check FE termination reasons happening on FE startup.
137
+ if (
138
+ fe_termination_reason
139
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
140
+ ):
141
+ return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR
142
+ elif (
143
+ fe_termination_reason
144
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT
145
+ ):
146
+ return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT
147
+ elif (
148
+ fe_termination_reason
149
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR
150
+ ):
151
+ return TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR
152
+ else:
153
+ logger.error(
154
+ "unexpected function executor startup failure reason",
155
+ fe_termination_reason=FunctionExecutorTerminationReason.Name(
156
+ fe_termination_reason
157
+ ),
158
+ )
159
+ return TaskFailureReason.TASK_FAILURE_REASON_UNKNOWN
File without changes