indexify 0.4.29__py3-none-any.whl → 0.4.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/build_image.py +56 -16
- indexify/cli/deploy.py +1 -1
- indexify/executor/function_executor_controller/__init__.py +2 -2
- indexify/executor/function_executor_controller/completed_task_allocation_metrics.py +87 -0
- indexify/executor/function_executor_controller/events.py +29 -33
- indexify/executor/function_executor_controller/{finalize_task.py → finalize_task_allocation.py} +45 -37
- indexify/executor/function_executor_controller/function_executor_controller.py +194 -180
- indexify/executor/function_executor_controller/loggers.py +15 -17
- indexify/executor/function_executor_controller/message_validators.py +4 -12
- indexify/executor/function_executor_controller/metrics/completed_task_allocation_metrics.py +70 -0
- indexify/executor/function_executor_controller/metrics/finalize_task_allocation.py +26 -0
- indexify/executor/function_executor_controller/metrics/function_executor_controller.py +12 -11
- indexify/executor/function_executor_controller/metrics/prepare_task_allocation.py +27 -0
- indexify/executor/function_executor_controller/{prepare_task.py → prepare_task_allocation.py} +33 -29
- indexify/executor/function_executor_controller/{run_task.py → run_task_allocation.py} +54 -51
- indexify/executor/function_executor_controller/{task_info.py → task_allocation_info.py} +6 -6
- indexify/executor/function_executor_controller/{task_input.py → task_allocation_input.py} +2 -2
- indexify/executor/function_executor_controller/{task_output.py → task_allocation_output.py} +24 -24
- indexify/executor/state_reconciler.py +23 -19
- {indexify-0.4.29.dist-info → indexify-0.4.31.dist-info}/METADATA +2 -2
- {indexify-0.4.29.dist-info → indexify-0.4.31.dist-info}/RECORD +24 -24
- indexify/executor/function_executor_controller/completed_task_metrics.py +0 -83
- indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -68
- indexify/executor/function_executor_controller/metrics/finalize_task.py +0 -20
- indexify/executor/function_executor_controller/metrics/prepare_task.py +0 -18
- /indexify/executor/function_executor_controller/metrics/{run_task.py → run_task_allocation.py} +0 -0
- {indexify-0.4.29.dist-info → indexify-0.4.31.dist-info}/WHEEL +0 -0
- {indexify-0.4.29.dist-info → indexify-0.4.31.dist-info}/entry_points.txt +0 -0
@@ -13,16 +13,16 @@ from indexify.proto.executor_api_pb2 import (
|
|
13
13
|
)
|
14
14
|
|
15
15
|
|
16
|
-
class
|
17
|
-
"""Metrics for a task."""
|
16
|
+
class TaskAllocationMetrics:
|
17
|
+
"""Metrics for a task allocation."""
|
18
18
|
|
19
19
|
def __init__(self, counters: Dict[str, int], timers: Dict[str, float]):
|
20
20
|
self.counters = counters
|
21
21
|
self.timers = timers
|
22
22
|
|
23
23
|
|
24
|
-
class
|
25
|
-
"""Result of running a task."""
|
24
|
+
class TaskAllocationOutput:
|
25
|
+
"""Result of running a task allocation."""
|
26
26
|
|
27
27
|
def __init__(
|
28
28
|
self,
|
@@ -34,7 +34,7 @@ class TaskOutput:
|
|
34
34
|
invocation_error_output: Optional[SerializedObjectInsideBLOB] = None,
|
35
35
|
uploaded_invocation_error_blob: Optional[BLOB] = None,
|
36
36
|
next_functions: List[str] = [],
|
37
|
-
metrics: Optional[
|
37
|
+
metrics: Optional[TaskAllocationMetrics] = None,
|
38
38
|
execution_start_time: Optional[float] = None,
|
39
39
|
execution_end_time: Optional[float] = None,
|
40
40
|
):
|
@@ -56,9 +56,9 @@ class TaskOutput:
|
|
56
56
|
allocation: TaskAllocation,
|
57
57
|
execution_start_time: Optional[float],
|
58
58
|
execution_end_time: Optional[float],
|
59
|
-
) -> "
|
60
|
-
"""Creates a
|
61
|
-
return
|
59
|
+
) -> "TaskAllocationOutput":
|
60
|
+
"""Creates a TaskAllocationOutput for an internal error."""
|
61
|
+
return TaskAllocationOutput(
|
62
62
|
allocation=allocation,
|
63
63
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
64
64
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR,
|
@@ -72,9 +72,9 @@ class TaskOutput:
|
|
72
72
|
allocation: TaskAllocation,
|
73
73
|
execution_start_time: Optional[float],
|
74
74
|
execution_end_time: Optional[float],
|
75
|
-
) -> "
|
76
|
-
"""Creates a
|
77
|
-
return
|
75
|
+
) -> "TaskAllocationOutput":
|
76
|
+
"""Creates a TaskAllocationOutput for a function timeout error."""
|
77
|
+
return TaskAllocationOutput(
|
78
78
|
allocation=allocation,
|
79
79
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
80
80
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
|
@@ -88,10 +88,10 @@ class TaskOutput:
|
|
88
88
|
allocation: TaskAllocation,
|
89
89
|
execution_start_time: Optional[float],
|
90
90
|
execution_end_time: Optional[float],
|
91
|
-
) -> "
|
92
|
-
"""Creates a
|
91
|
+
) -> "TaskAllocationOutput":
|
92
|
+
"""Creates a TaskAllocationOutput for an unresponsive FE aka grey failure."""
|
93
93
|
# When FE is unresponsive we don't know exact cause of the failure.
|
94
|
-
return
|
94
|
+
return TaskAllocationOutput(
|
95
95
|
allocation=allocation,
|
96
96
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
97
97
|
# Treat the grey failure as a function error and thus charge the customer.
|
@@ -102,14 +102,14 @@ class TaskOutput:
|
|
102
102
|
)
|
103
103
|
|
104
104
|
@classmethod
|
105
|
-
def
|
105
|
+
def task_allocation_cancelled(
|
106
106
|
cls,
|
107
107
|
allocation: TaskAllocation,
|
108
108
|
execution_start_time: Optional[float],
|
109
109
|
execution_end_time: Optional[float],
|
110
|
-
) -> "
|
111
|
-
"""Creates a
|
112
|
-
return
|
110
|
+
) -> "TaskAllocationOutput":
|
111
|
+
"""Creates a TaskAllocationOutput for the case when task allocation didn't finish because its allocation was removed by Server."""
|
112
|
+
return TaskAllocationOutput(
|
113
113
|
allocation=allocation,
|
114
114
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
115
115
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED,
|
@@ -121,9 +121,9 @@ class TaskOutput:
|
|
121
121
|
def function_executor_terminated(
|
122
122
|
cls,
|
123
123
|
allocation: TaskAllocation,
|
124
|
-
) -> "
|
125
|
-
"""Creates a
|
126
|
-
return
|
124
|
+
) -> "TaskAllocationOutput":
|
125
|
+
"""Creates a TaskAllocationOutput for the case when task allocation didn't run because its FE terminated."""
|
126
|
+
return TaskAllocationOutput(
|
127
127
|
allocation=allocation,
|
128
128
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
129
129
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
@@ -135,9 +135,9 @@ class TaskOutput:
|
|
135
135
|
allocation: TaskAllocation,
|
136
136
|
fe_termination_reason: FunctionExecutorTerminationReason,
|
137
137
|
logger: Any,
|
138
|
-
) -> "
|
139
|
-
"""Creates a
|
140
|
-
return
|
138
|
+
) -> "TaskAllocationOutput":
|
139
|
+
"""Creates a TaskAllocationOutput for the case when we fail a task allocation that didn't run because its FE startup failed."""
|
140
|
+
return TaskAllocationOutput(
|
141
141
|
allocation=allocation,
|
142
142
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
143
143
|
failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
|
@@ -247,6 +247,8 @@ class ExecutorStateReconciler:
|
|
247
247
|
with metric_state_reconciliation_latency.time():
|
248
248
|
metric_state_reconciliations.inc()
|
249
249
|
await self._reconcile_state(last_reconciled_state)
|
250
|
+
# Update the clock regardless of success or failure.
|
251
|
+
# This is to show Server that we actually processed the message.
|
250
252
|
self._state_reporter.update_last_server_clock(
|
251
253
|
last_reconciled_state.clock
|
252
254
|
)
|
@@ -261,7 +263,7 @@ class ExecutorStateReconciler:
|
|
261
263
|
try:
|
262
264
|
# Reconcile FEs first because Tasks depend on them.
|
263
265
|
self._reconcile_function_executors(desired_state.function_executors)
|
264
|
-
self.
|
266
|
+
self._reconcile_task_allocations(desired_state.task_allocations)
|
265
267
|
return
|
266
268
|
except Exception as e:
|
267
269
|
self._logger.error(
|
@@ -383,38 +385,38 @@ class ExecutorStateReconciler:
|
|
383
385
|
self._function_executor_controllers.pop(function_executor_id, None)
|
384
386
|
self._shutting_down_fe_ids.discard(function_executor_id)
|
385
387
|
|
386
|
-
def
|
388
|
+
def _reconcile_task_allocations(self, task_allocations: Iterable[TaskAllocation]):
|
387
389
|
valid_task_allocations: List[TaskAllocation] = self._valid_task_allocations(
|
388
390
|
task_allocations
|
389
391
|
)
|
390
392
|
for task_allocation in valid_task_allocations:
|
391
|
-
self.
|
393
|
+
self._reconcile_task_allocation(task_allocation)
|
392
394
|
|
393
395
|
# Cancel tasks that are no longer in the desired state.
|
394
|
-
# FE ID => [
|
395
|
-
|
396
|
+
# FE ID => [Allocation ID]
|
397
|
+
desired_alloc_ids_per_fe: Dict[str, List[str]] = {}
|
396
398
|
for task_allocation in valid_task_allocations:
|
397
|
-
if task_allocation.function_executor_id not in
|
398
|
-
|
399
|
-
|
400
|
-
task_allocation.
|
399
|
+
if task_allocation.function_executor_id not in desired_alloc_ids_per_fe:
|
400
|
+
desired_alloc_ids_per_fe[task_allocation.function_executor_id] = []
|
401
|
+
desired_alloc_ids_per_fe[task_allocation.function_executor_id].append(
|
402
|
+
task_allocation.allocation_id
|
401
403
|
)
|
402
404
|
|
403
405
|
for fe_controller in self._function_executor_controllers.values():
|
404
406
|
fe_controller: FunctionExecutorController
|
405
|
-
if fe_controller.function_executor_id() in
|
406
|
-
|
407
|
-
|
407
|
+
if fe_controller.function_executor_id() in desired_alloc_ids_per_fe:
|
408
|
+
desired_fe_alloc_ids: Set[str] = set(
|
409
|
+
desired_alloc_ids_per_fe[fe_controller.function_executor_id()]
|
408
410
|
)
|
409
411
|
else:
|
410
412
|
# No tasks desired for this FE, so cancel all its tasks.
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
for
|
415
|
-
fe_controller.
|
413
|
+
desired_fe_alloc_ids: Set[str] = set()
|
414
|
+
actual_fe_alloc_ids: Set[str] = set(fe_controller.task_allocation_ids())
|
415
|
+
alloc_ids_to_remove: Set[str] = actual_fe_alloc_ids - desired_fe_alloc_ids
|
416
|
+
for alloc_id in alloc_ids_to_remove:
|
417
|
+
fe_controller.remove_task_allocation(alloc_id)
|
416
418
|
|
417
|
-
def
|
419
|
+
def _reconcile_task_allocation(self, task_allocation: TaskAllocation):
|
418
420
|
"""Reconciles a single TaskAllocation with the desired state.
|
419
421
|
|
420
422
|
Doesn't raise any exceptions.
|
@@ -422,7 +424,9 @@ class ExecutorStateReconciler:
|
|
422
424
|
function_executor_controller: FunctionExecutorController = (
|
423
425
|
self._function_executor_controllers[task_allocation.function_executor_id]
|
424
426
|
)
|
425
|
-
if function_executor_controller.
|
427
|
+
if function_executor_controller.has_task_allocation(
|
428
|
+
task_allocation.allocation_id
|
429
|
+
):
|
426
430
|
# Nothing to do, task already exists and it's immutable.
|
427
431
|
return
|
428
432
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.31
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: aiohttp (>=3.12.15,<4.0.0)
|
17
|
-
Requires-Dist: boto3 (>=1.40.
|
17
|
+
Requires-Dist: boto3 (>=1.40.15,<2.0.0)
|
18
18
|
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
19
19
|
Requires-Dist: httpx[http2] (==0.27.2)
|
20
20
|
Requires-Dist: nanoid (>=2.0.0,<3.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
indexify/cli/__init__.py,sha256=ELFLx_Z_oWm30jwOpYjbD6Ori3Nzz4ldkvmGVK7QMgw,426
|
2
|
-
indexify/cli/build_image.py,sha256=
|
3
|
-
indexify/cli/deploy.py,sha256=
|
2
|
+
indexify/cli/build_image.py,sha256=FPP8hdj0E5IsEKokS4IBWKSx9PWZ66ZY5GqEI9oHj7k,4139
|
3
|
+
indexify/cli/deploy.py,sha256=f3CX1PhnkrbNzrqv4BY_C6YZHYxSJKr0sAjDsw1rKBs,1850
|
4
4
|
indexify/cli/executor.py,sha256=0go8YUPFCwg77pYbCaoWuPraqW7KBgZ6Fyx6sQzT4aM,6286
|
5
5
|
indexify/executor/README.md,sha256=ozC6_hMkhQQNVCMEpBxwiUALz6lwErPQxNxQfQDqnG4,2029
|
6
6
|
indexify/executor/blob_store/blob_store.py,sha256=lrSGTZa_H4Cs1BFwADp-aluvD3LpmE1XO76ZJMX5alU,5798
|
@@ -21,28 +21,28 @@ indexify/executor/function_executor/server/function_executor_server.py,sha256=_D
|
|
21
21
|
indexify/executor/function_executor/server/function_executor_server_factory.py,sha256=pZ3tQoaeWP2NDaR-A0PUYmzrBz768U2b9ENBFQG1INg,1814
|
22
22
|
indexify/executor/function_executor/server/subprocess_function_executor_server.py,sha256=JekDOqF7oFD4J6zcN3xB0Dxd1cgpEXMOsb_rKZOeBlI,668
|
23
23
|
indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py,sha256=w5aGQPHWLpixlP9-BbZu6oL_muMA95-hr7WKVxiEL7Q,4303
|
24
|
-
indexify/executor/function_executor_controller/__init__.py,sha256=
|
24
|
+
indexify/executor/function_executor_controller/__init__.py,sha256=jJQa09kQw05ycJhIWGv5-RoWe8WIymoCki5mo5P3Nyo,523
|
25
25
|
indexify/executor/function_executor_controller/aio_utils.py,sha256=nohPk9k38FpZ87y5jgbb-UhUNvf-GRETkyyRBp7WnVw,804
|
26
|
-
indexify/executor/function_executor_controller/
|
26
|
+
indexify/executor/function_executor_controller/completed_task_allocation_metrics.py,sha256=eYtV0--vwd_xwdRBTr_FRsbQ3D4bjOuIQ94TjEy5kR8,4137
|
27
27
|
indexify/executor/function_executor_controller/create_function_executor.py,sha256=_VLmT9zmo0Hvt4K4WkC8PCB9qNgTv8k9QkwTSAOQRDU,11158
|
28
28
|
indexify/executor/function_executor_controller/debug_event_loop.py,sha256=VJOKe_c9HjIDVCjhMY3Yqyeq1tAM1eVa2chZa6CMf-U,1016
|
29
29
|
indexify/executor/function_executor_controller/downloads.py,sha256=B2dbaa6osp1_vCQ6WY_9znAca3Z2qqVzQAF2av3v8Pg,5304
|
30
|
-
indexify/executor/function_executor_controller/events.py,sha256=
|
31
|
-
indexify/executor/function_executor_controller/
|
32
|
-
indexify/executor/function_executor_controller/function_executor_controller.py,sha256=
|
33
|
-
indexify/executor/function_executor_controller/loggers.py,sha256=
|
34
|
-
indexify/executor/function_executor_controller/message_validators.py,sha256=
|
35
|
-
indexify/executor/function_executor_controller/metrics/
|
30
|
+
indexify/executor/function_executor_controller/events.py,sha256=Ly8ypjgyPdOThyj9TXN15RZTJ6ipP_nDNiYND9Ghyr4,5795
|
31
|
+
indexify/executor/function_executor_controller/finalize_task_allocation.py,sha256=hYCULPN0LWpPJXwHtyLUYSUQOGC3Lp47SQuLLJAO90A,7135
|
32
|
+
indexify/executor/function_executor_controller/function_executor_controller.py,sha256=jq4UV_6op9GbjmcjEmSDxVbb_DtUj1gVKF24rmZU4dU,41698
|
33
|
+
indexify/executor/function_executor_controller/loggers.py,sha256=KNXlb7n3CssMbYmt5DrH-cUgfYEf38JM1W82uzkYsZY,3556
|
34
|
+
indexify/executor/function_executor_controller/message_validators.py,sha256=fxF-sNC1Pf84NmbDb0-Yg6I6OccgjQeWjSkaPkIcip8,3055
|
35
|
+
indexify/executor/function_executor_controller/metrics/completed_task_allocation_metrics.py,sha256=iW-is4V4mv9umIQQar_2k_rlwnaTLs5uzCo-TuCylpg,3155
|
36
36
|
indexify/executor/function_executor_controller/metrics/downloads.py,sha256=G8UUDfnzmiK_26OvZYTqH0KgNb3kI-0TgzGLFEuSEFc,892
|
37
|
-
indexify/executor/function_executor_controller/metrics/
|
38
|
-
indexify/executor/function_executor_controller/metrics/function_executor_controller.py,sha256=
|
39
|
-
indexify/executor/function_executor_controller/metrics/
|
40
|
-
indexify/executor/function_executor_controller/metrics/
|
41
|
-
indexify/executor/function_executor_controller/
|
42
|
-
indexify/executor/function_executor_controller/
|
43
|
-
indexify/executor/function_executor_controller/
|
44
|
-
indexify/executor/function_executor_controller/
|
45
|
-
indexify/executor/function_executor_controller/
|
37
|
+
indexify/executor/function_executor_controller/metrics/finalize_task_allocation.py,sha256=o2T3j9mTJ1Zjfje1Zuiw2BMOTLoSLyF-wSVhmNEUcbQ,940
|
38
|
+
indexify/executor/function_executor_controller/metrics/function_executor_controller.py,sha256=3nMsgtRbPHIleU8FQZqrdcraJd-7rAjqF7i1PcQRyq8,2803
|
39
|
+
indexify/executor/function_executor_controller/metrics/prepare_task_allocation.py,sha256=re07otwicIbPBFN43kihKEzuxN-4ZNt74LyrXKtI68I,971
|
40
|
+
indexify/executor/function_executor_controller/metrics/run_task_allocation.py,sha256=ZFv_nw5_pKUJoTaavSyzdglQKW4uvC2XyK8S6xi9xLQ,1064
|
41
|
+
indexify/executor/function_executor_controller/prepare_task_allocation.py,sha256=uv6_4Qd8DhJUhmp6eaCDrua4j6LLFqOjr6IwYmiOIHQ,9503
|
42
|
+
indexify/executor/function_executor_controller/run_task_allocation.py,sha256=5J4BhxBhmvqm9nd1i5YNN6Z_49BGdPafXZw4VWoX71Q,15842
|
43
|
+
indexify/executor/function_executor_controller/task_allocation_info.py,sha256=7gqSp90SuoGxpV5K80YrK2YhrLteeh5xSH--eStkX8s,1098
|
44
|
+
indexify/executor/function_executor_controller/task_allocation_input.py,sha256=lST2UgjzsDbMAh6G9vsdnb_lRq9s0BObbz4RX3ggXag,897
|
45
|
+
indexify/executor/function_executor_controller/task_allocation_output.py,sha256=pJC3FYxFyVMnbv2ci8KR8ONAWtjO48cE39dpM9sdSCA,7403
|
46
46
|
indexify/executor/function_executor_controller/terminate_function_executor.py,sha256=GHkMEidd4zbkulFWAeLGX1HsXtZvPJXh4dEusgy2ioA,1731
|
47
47
|
indexify/executor/host_resources/host_resources.py,sha256=eUyP05EX7QdOtQ5vbX_KCpvnBS2B7fl06UWeF9Oigns,3813
|
48
48
|
indexify/executor/host_resources/nvidia_gpu.py,sha256=uTCkLXnozZSpax8VApt0QMMM9YcBUK9eggYpwmLz09I,3308
|
@@ -63,13 +63,13 @@ indexify/executor/monitoring/prometheus_metrics_handler.py,sha256=KiGqSf7rkXTfbD
|
|
63
63
|
indexify/executor/monitoring/reported_state_handler.py,sha256=R1C3tk8CF2xh7pbBgKzM1ADReDMEV9CyIRlAZ9NFado,697
|
64
64
|
indexify/executor/monitoring/server.py,sha256=aAKzL9J243Q9_41JY-4tSBdFKXR_ZOMz-DEJNtxfYC4,1483
|
65
65
|
indexify/executor/monitoring/startup_probe_handler.py,sha256=zXXsBU15SMlBx1bSFpxWDfed1VHtKKnwvLQ8-frpG98,425
|
66
|
-
indexify/executor/state_reconciler.py,sha256=
|
66
|
+
indexify/executor/state_reconciler.py,sha256=fA-2hgPR2YFI8wOwLXzURn-OhsZEqNhMzFlksDESclc,20638
|
67
67
|
indexify/executor/state_reporter.py,sha256=zXb6SvD1yA4tMDWxT_p995y8l490hifXRHX4LjN6WOA,15505
|
68
68
|
indexify/proto/executor_api.proto,sha256=YwLeLjyLHhs5qoWLA50uHY2KdKRGfBQBKZwE8VXmzeo,12871
|
69
69
|
indexify/proto/executor_api_pb2.py,sha256=vTG1-2Pp4OnTWFD4GYphgJ3cUbTbDjCOKstKrLBXB-E,16472
|
70
70
|
indexify/proto/executor_api_pb2.pyi,sha256=-6P-ef-fBJF0CTc4UucIzrDLCBVZpIEhEz2qhexvwjk,23175
|
71
71
|
indexify/proto/executor_api_pb2_grpc.py,sha256=u9GEQV4nm_GvApRxjVo806CkgBMBVReb5IVrcaDaliY,7520
|
72
|
-
indexify-0.4.
|
73
|
-
indexify-0.4.
|
74
|
-
indexify-0.4.
|
75
|
-
indexify-0.4.
|
72
|
+
indexify-0.4.31.dist-info/METADATA,sha256=XQZTmN_xg_dU5OBebaGknra6fs4JPBwKlotK5211Hxw,1390
|
73
|
+
indexify-0.4.31.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
74
|
+
indexify-0.4.31.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
|
75
|
+
indexify-0.4.31.dist-info/RECORD,,
|
@@ -1,83 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
from indexify.proto.executor_api_pb2 import (
|
5
|
-
TaskFailureReason,
|
6
|
-
TaskOutcomeCode,
|
7
|
-
)
|
8
|
-
|
9
|
-
from .metrics.completed_task_metrics import (
|
10
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
11
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
12
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
13
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
14
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
15
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
16
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
17
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
18
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
19
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
20
|
-
metric_task_completion_latency,
|
21
|
-
metric_tasks_completed,
|
22
|
-
)
|
23
|
-
from .task_info import TaskInfo
|
24
|
-
|
25
|
-
|
26
|
-
def emit_completed_task_metrics(task_info: TaskInfo, logger: Any) -> None:
|
27
|
-
"""Emits Prometheus metrics for a completed task.
|
28
|
-
|
29
|
-
Doesn't raise any exceptions.
|
30
|
-
"""
|
31
|
-
logger = logger.bind(module=__name__)
|
32
|
-
metric_task_completion_latency.observe(time.monotonic() - task_info.start_time)
|
33
|
-
|
34
|
-
task_outcome_code: TaskOutcomeCode = task_info.output.outcome_code
|
35
|
-
task_failure_reason: TaskFailureReason = task_info.output.failure_reason
|
36
|
-
metric_tasks_completed.labels(
|
37
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
38
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
39
|
-
).inc()
|
40
|
-
if task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_SUCCESS:
|
41
|
-
metric_tasks_completed.labels(
|
42
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
43
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
44
|
-
).inc()
|
45
|
-
elif task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
|
46
|
-
if task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR:
|
47
|
-
metric_tasks_completed.labels(
|
48
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
49
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
50
|
-
).inc()
|
51
|
-
elif (
|
52
|
-
task_failure_reason
|
53
|
-
== TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED
|
54
|
-
):
|
55
|
-
metric_tasks_completed.labels(
|
56
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
57
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
58
|
-
).inc()
|
59
|
-
elif (
|
60
|
-
task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED
|
61
|
-
):
|
62
|
-
metric_tasks_completed.labels(
|
63
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
64
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
65
|
-
).inc()
|
66
|
-
elif task_failure_reason in [
|
67
|
-
TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
|
68
|
-
TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
|
69
|
-
TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR,
|
70
|
-
]:
|
71
|
-
metric_tasks_completed.labels(
|
72
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
73
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
74
|
-
).inc()
|
75
|
-
else:
|
76
|
-
metric_tasks_completed.labels(
|
77
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
78
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
79
|
-
).inc()
|
80
|
-
logger.warning(
|
81
|
-
"unexpected task failure reason",
|
82
|
-
failure_reason=TaskFailureReason.Name(task_failure_reason),
|
83
|
-
)
|
@@ -1,68 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import (
|
4
|
-
latency_metric_for_customer_controlled_operation,
|
5
|
-
)
|
6
|
-
|
7
|
-
metric_tasks_completed: prometheus_client.Counter = prometheus_client.Counter(
|
8
|
-
"tasks_completed",
|
9
|
-
"Number of tasks that were completed",
|
10
|
-
["outcome_code", "failure_reason"],
|
11
|
-
)
|
12
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL = "all"
|
13
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS = "success"
|
14
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE = "failure"
|
15
|
-
|
16
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL = "all"
|
17
|
-
# Used when the task is successfull.
|
18
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE = "none"
|
19
|
-
# Matches TASK_FAILURE_REASON_UNKNOWN
|
20
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN = "unknown"
|
21
|
-
# Includes all function errors including timeouts to reduce cardinality.
|
22
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR = "function_error"
|
23
|
-
# Includes all internal errors to reduce cardinality.
|
24
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR = "internal_error"
|
25
|
-
# Matches TASK_FAILURE_REASON_TASK_CANCELLED
|
26
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED = "task_cancelled"
|
27
|
-
# Matches TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED
|
28
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED = (
|
29
|
-
"function_executor_terminated"
|
30
|
-
)
|
31
|
-
|
32
|
-
# Valid combinations of the labels:
|
33
|
-
metric_tasks_completed.labels(
|
34
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
35
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
36
|
-
)
|
37
|
-
metric_tasks_completed.labels(
|
38
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
39
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
40
|
-
)
|
41
|
-
|
42
|
-
metric_tasks_completed.labels(
|
43
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
44
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
45
|
-
)
|
46
|
-
metric_tasks_completed.labels(
|
47
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
48
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
49
|
-
)
|
50
|
-
metric_tasks_completed.labels(
|
51
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
52
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
53
|
-
)
|
54
|
-
metric_tasks_completed.labels(
|
55
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
56
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
57
|
-
)
|
58
|
-
metric_tasks_completed.labels(
|
59
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
60
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
61
|
-
)
|
62
|
-
|
63
|
-
metric_task_completion_latency: prometheus_client.Histogram = (
|
64
|
-
latency_metric_for_customer_controlled_operation(
|
65
|
-
"task_completion",
|
66
|
-
"task completion from the moment it got fetched until its output got uploaded to blob store",
|
67
|
-
)
|
68
|
-
)
|
@@ -1,20 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import latency_metric_for_fast_operation
|
4
|
-
|
5
|
-
# Task finalization metrics.
|
6
|
-
metric_task_finalizations: prometheus_client.Counter = prometheus_client.Counter(
|
7
|
-
"task_finalizations",
|
8
|
-
"Number of task finalizations",
|
9
|
-
)
|
10
|
-
metric_task_finalization_errors: prometheus_client.Counter = prometheus_client.Counter(
|
11
|
-
"task_finalization_errors",
|
12
|
-
"Number of task finalization errors",
|
13
|
-
)
|
14
|
-
metric_tasks_finalizing: prometheus_client.Gauge = prometheus_client.Gauge(
|
15
|
-
"tasks_finalizing",
|
16
|
-
"Number of tasks currently finalizing",
|
17
|
-
)
|
18
|
-
metric_task_finalization_latency: prometheus_client.Histogram = (
|
19
|
-
latency_metric_for_fast_operation("task_finalization", "task finalization")
|
20
|
-
)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import latency_metric_for_fast_operation
|
4
|
-
|
5
|
-
metric_task_preparations: prometheus_client.Counter = prometheus_client.Counter(
|
6
|
-
"task_preparations", "Number of task preparations for execution"
|
7
|
-
)
|
8
|
-
metric_task_preparation_errors: prometheus_client.Counter = prometheus_client.Counter(
|
9
|
-
"task_preparation_errors", "Number of task preparation errors"
|
10
|
-
)
|
11
|
-
metric_task_preparation_latency: prometheus_client.Histogram = (
|
12
|
-
latency_metric_for_fast_operation(
|
13
|
-
"task_preparation", "task preparation for execution"
|
14
|
-
)
|
15
|
-
)
|
16
|
-
metric_tasks_getting_prepared: prometheus_client.Gauge = prometheus_client.Gauge(
|
17
|
-
"tasks_getting_prepared", "Number of tasks currently getting prepared for execution"
|
18
|
-
)
|
/indexify/executor/function_executor_controller/metrics/{run_task.py → run_task_allocation.py}
RENAMED
File without changes
|
File without changes
|
File without changes
|