indexify 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,6 @@ from typing import Any, Optional, Tuple
5
5
  from tensorlake.function_executor.proto.function_executor_pb2 import (
6
6
  InitializeRequest,
7
7
  SerializedObject,
8
- SerializedObjectEncoding,
9
8
  )
10
9
 
11
10
  from indexify.executor.blob_store.blob_store import BLOBStore
@@ -73,25 +72,21 @@ async def create_function_executor(
73
72
  except BaseException as e:
74
73
  if isinstance(e, asyncio.CancelledError):
75
74
  logger.info("function executor startup was cancelled")
76
- return FunctionExecutorCreated(
77
- function_executor=None,
78
- output=FunctionExecutorStartupOutput(
79
- function_executor_description=function_executor_description,
80
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_REMOVED_FROM_DESIRED_STATE,
81
- ),
82
- )
83
75
  else:
84
76
  logger.error(
85
77
  "failed to create function executor due to platform error",
86
78
  exc_info=e,
87
79
  )
88
- return FunctionExecutorCreated(
89
- function_executor=None,
90
- output=FunctionExecutorStartupOutput(
91
- function_executor_description=function_executor_description,
92
- termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
93
- ),
94
- )
80
+
81
+ # Cancelled FE startup means that Server removed it from desired state so it doesn't matter what termination_reason we return
82
+ # in this case cause this FE will be removed from Executor reported state.
83
+ return FunctionExecutorCreated(
84
+ function_executor=None,
85
+ output=FunctionExecutorStartupOutput(
86
+ function_executor_description=function_executor_description,
87
+ termination_reason=FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR,
88
+ ),
89
+ )
95
90
 
96
91
 
97
92
  async def _initialization_result_to_fe_creation_output(
@@ -461,12 +461,25 @@ class FunctionExecutorController:
461
461
  if event.function_executor is None:
462
462
  # Server needs to increment attempts counter for all the tasks that were pending while FE was starting up.
463
463
  # This prevents infinite retries if FEs consistently fail to start up.
464
+ # The allocations we marked here also need to not used FE terminated failure reason in their outputs
465
+ # because FE terminated means that the allocation wasn't the cause of the FE termination.
466
+ allocation_ids_caused_termination: List[str] = []
467
+ for task_info in self._tasks.values():
468
+ task_logger = task_allocation_logger(task_info.allocation, self._logger)
469
+ task_logger.info(
470
+ "marking allocation failed on function executor startup failure"
471
+ )
472
+ allocation_ids_caused_termination.append(
473
+ task_info.allocation.allocation_id
474
+ )
475
+ task_info.output = TaskOutput.function_executor_startup_failed(
476
+ allocation=task_info.allocation,
477
+ fe_startup_output=event.output,
478
+ logger=task_logger,
479
+ )
464
480
  self._start_termination(
465
481
  fe_termination_reason=event.output.termination_reason,
466
- allocation_ids_caused_termination=[
467
- task_info.allocation.allocation_id
468
- for task_info in self._tasks.values()
469
- ],
482
+ allocation_ids_caused_termination=allocation_ids_caused_termination,
470
483
  )
471
484
  return
472
485
 
@@ -593,9 +606,11 @@ class FunctionExecutorController:
593
606
  _FE_CONTROLLER_STATE.TERMINATING,
594
607
  _FE_CONTROLLER_STATE.TERMINATED,
595
608
  ]:
596
- task_info.output = TaskOutput.function_executor_terminated(
597
- task_info.allocation
598
- )
609
+ if task_info.output is None:
610
+ # The output can be set already by FE startup failure handler.
611
+ task_info.output = TaskOutput.function_executor_terminated(
612
+ task_info.allocation
613
+ )
599
614
  self._start_task_output_upload(task_info)
600
615
  elif self._internal_state == _FE_CONTROLLER_STATE.RUNNING:
601
616
  self._running_task = task_info
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
1
+ from typing import Any, Dict, List, Optional
2
2
 
3
3
  from tensorlake.function_executor.proto.function_executor_pb2 import (
4
4
  SerializedObject,
@@ -6,11 +6,14 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
6
6
 
7
7
  from indexify.proto.executor_api_pb2 import (
8
8
  DataPayload,
9
+ FunctionExecutorTerminationReason,
9
10
  TaskAllocation,
10
11
  TaskFailureReason,
11
12
  TaskOutcomeCode,
12
13
  )
13
14
 
15
+ from .function_executor_startup_output import FunctionExecutorStartupOutput
16
+
14
17
 
15
18
  class TaskMetrics:
16
19
  """Metrics for a task."""
@@ -105,3 +108,52 @@ class TaskOutput:
105
108
  outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
106
109
  failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
107
110
  )
111
+
112
+ @classmethod
113
+ def function_executor_startup_failed(
114
+ cls,
115
+ allocation: TaskAllocation,
116
+ fe_startup_output: FunctionExecutorStartupOutput,
117
+ logger: Any,
118
+ ) -> "TaskOutput":
119
+ """Creates a TaskOutput for the case when we fail a task because its FE startup failed."""
120
+ output = TaskOutput(
121
+ allocation=allocation,
122
+ outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
123
+ failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
124
+ fe_startup_output.termination_reason, logger
125
+ ),
126
+ )
127
+ # Use FE startup stdout, stderr for allocations that we failed because FE startup failed.
128
+ output.uploaded_stdout = fe_startup_output.stdout
129
+ output.uploaded_stderr = fe_startup_output.stderr
130
+ return output
131
+
132
+
133
+ def _fe_startup_failure_reason_to_task_failure_reason(
134
+ fe_termination_reason: FunctionExecutorTerminationReason, logger: Any
135
+ ) -> TaskFailureReason:
136
+ # Only need to check FE termination reasons happening on FE startup.
137
+ if (
138
+ fe_termination_reason
139
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_ERROR
140
+ ):
141
+ return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR
142
+ elif (
143
+ fe_termination_reason
144
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_FUNCTION_TIMEOUT
145
+ ):
146
+ return TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT
147
+ elif (
148
+ fe_termination_reason
149
+ == FunctionExecutorTerminationReason.FUNCTION_EXECUTOR_TERMINATION_REASON_STARTUP_FAILED_INTERNAL_ERROR
150
+ ):
151
+ return TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR
152
+ else:
153
+ logger.error(
154
+ "unexpected function executor startup failure reason",
155
+ fe_termination_reason=FunctionExecutorTerminationReason.Name(
156
+ fe_termination_reason
157
+ ),
158
+ )
159
+ return TaskFailureReason.TASK_FAILURE_REASON_UNKNOWN
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.12
3
+ Version: 0.4.13
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -17,7 +17,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
17
17
  Requires-Dist: boto3 (>=1.37.30,<2.0.0)
18
18
  Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
19
19
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
20
- Requires-Dist: tensorlake (==0.2.8)
20
+ Requires-Dist: tensorlake (==0.2.12)
21
21
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -23,11 +23,11 @@ indexify/executor/function_executor/server/subprocess_function_executor_server.p
23
23
  indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py,sha256=w5aGQPHWLpixlP9-BbZu6oL_muMA95-hr7WKVxiEL7Q,4303
24
24
  indexify/executor/function_executor_controller/__init__.py,sha256=VPuuBEYOKf7OWyPPjy-jGOv-d5xJqHvkJfFT_oj-AsE,492
25
25
  indexify/executor/function_executor_controller/completed_task_metrics.py,sha256=MhnC-ddgmTK4yTsuZxgTKnqZ-YSVeWn2EhbbiggsSKk,3664
26
- indexify/executor/function_executor_controller/create_function_executor.py,sha256=qQ_xBQRYUwz8SbNH7-H6D9GP-NJM3OlxPaIuLnPpbRM,9996
26
+ indexify/executor/function_executor_controller/create_function_executor.py,sha256=DA8niVgftxSE_OYARw7nLSJujVu8HDzHh1EW_cXquC0,9766
27
27
  indexify/executor/function_executor_controller/debug_event_loop.py,sha256=VJOKe_c9HjIDVCjhMY3Yqyeq1tAM1eVa2chZa6CMf-U,1016
28
28
  indexify/executor/function_executor_controller/downloads.py,sha256=XjCUmLY_jrI3AlnXC7aDwwKWTvsQjV7I9AXzrbIeY6c,7063
29
29
  indexify/executor/function_executor_controller/events.py,sha256=r2K3k9Nnkzh0j6HHZC0DxOdQ3HtCmzt4eN2DIwTa7NM,5456
30
- indexify/executor/function_executor_controller/function_executor_controller.py,sha256=OgPlcYELVehuDVd1mzo2-LMzvIKBsYMZDGUaGDwkigc,35454
30
+ indexify/executor/function_executor_controller/function_executor_controller.py,sha256=CpIpZ0l8s5O22RLq_rQOBXRYrDq3YAWFXGQF1BfCbxQ,36415
31
31
  indexify/executor/function_executor_controller/function_executor_startup_output.py,sha256=PXg2r440kqHI3oHGZbb58ehuAuW_fmEdxLTAa-0V3p4,715
32
32
  indexify/executor/function_executor_controller/loggers.py,sha256=zEY2nt15gboX3SX6Kh1xjeCljZJZSE4lp27qNrg8yPY,3637
33
33
  indexify/executor/function_executor_controller/message_validators.py,sha256=aNiZhYA87pnxUJtZKvKGDt40rfox-TYH2J6mW7o-Pkw,2981
@@ -39,7 +39,7 @@ indexify/executor/function_executor_controller/metrics/upload_task_output.py,sha
39
39
  indexify/executor/function_executor_controller/prepare_task.py,sha256=AKbo_H_5pOKdxFKKkzdOb1WhQ0XT-4Qm9D3iIsukyMU,1247
40
40
  indexify/executor/function_executor_controller/run_task.py,sha256=ARa-OgqgJ1o_LmO1FIhkXBvqLWXCzYLiSiG2jGw_NR8,10448
41
41
  indexify/executor/function_executor_controller/task_info.py,sha256=ZEdypd8QVmYbrLt1186Ed9YEQwrO0Sx_hKH0QLg1DVY,1181
42
- indexify/executor/function_executor_controller/task_output.py,sha256=A5KJh60OI1FdHw6ABX3KxL9OMrC6ZADJHeo0W5iZpz8,3764
42
+ indexify/executor/function_executor_controller/task_output.py,sha256=xj2QhYsw2QEX13B43i3HJS6kJyR9WreoQth4hIVXIqk,5938
43
43
  indexify/executor/function_executor_controller/terminate_function_executor.py,sha256=YLDlKoanfUBcy7A9ydCYdUsDwApjcTTn1o4tjNVN_QA,1281
44
44
  indexify/executor/function_executor_controller/upload_task_output.py,sha256=fEZm5eodx5rNLQYFhmdkMDD9qjX3_wKo64x4aUKTu34,10403
45
45
  indexify/executor/host_resources/host_resources.py,sha256=ZSfox24jaz1IIaQWUmciOoev0l35rk8LHbnb_koJWno,3810
@@ -63,7 +63,7 @@ indexify/proto/executor_api.proto,sha256=MEsBzpL5tlBd6Yc3ZfheQ60DbXAZWQWGM1dlldh
63
63
  indexify/proto/executor_api_pb2.py,sha256=w2Wj1MEtTihNZ4TvPKPYDjMryO52fPa7A4dKv7vb__s,16236
64
64
  indexify/proto/executor_api_pb2.pyi,sha256=AEHW4YFm5FT8f66qKGkPH36szlWgCgXrH3CQ8rgxSps,22754
65
65
  indexify/proto/executor_api_pb2_grpc.py,sha256=JpT5K6jiS0NJVNyTt1mAPpyJMXuEGeNN2V6R3KmLHZ4,7607
66
- indexify-0.4.12.dist-info/METADATA,sha256=r21ToO5MZ3_RK7w-iSaXuwXjHSCwx_8-JdTF8_a9lTc,1115
67
- indexify-0.4.12.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
68
- indexify-0.4.12.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
69
- indexify-0.4.12.dist-info/RECORD,,
66
+ indexify-0.4.13.dist-info/METADATA,sha256=ZN16aCArggFFTIlaWelbhaF36t2CEtTAm6dbJxVbFU0,1116
67
+ indexify-0.4.13.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
68
+ indexify-0.4.13.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
69
+ indexify-0.4.13.dist-info/RECORD,,