indexify 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/executor/executor.py +11 -7
- indexify/executor/function_executor_controller/__init__.py +2 -2
- indexify/executor/function_executor_controller/completed_task_allocation_metrics.py +87 -0
- indexify/executor/function_executor_controller/events.py +29 -33
- indexify/executor/function_executor_controller/{finalize_task.py → finalize_task_allocation.py} +45 -37
- indexify/executor/function_executor_controller/function_executor_controller.py +194 -180
- indexify/executor/function_executor_controller/loggers.py +15 -17
- indexify/executor/function_executor_controller/message_validators.py +4 -12
- indexify/executor/function_executor_controller/metrics/completed_task_allocation_metrics.py +70 -0
- indexify/executor/function_executor_controller/metrics/finalize_task_allocation.py +26 -0
- indexify/executor/function_executor_controller/metrics/function_executor_controller.py +12 -11
- indexify/executor/function_executor_controller/metrics/prepare_task_allocation.py +27 -0
- indexify/executor/function_executor_controller/{prepare_task.py → prepare_task_allocation.py} +33 -29
- indexify/executor/function_executor_controller/{run_task.py → run_task_allocation.py} +54 -51
- indexify/executor/function_executor_controller/{task_info.py → task_allocation_info.py} +6 -6
- indexify/executor/function_executor_controller/{task_input.py → task_allocation_input.py} +2 -2
- indexify/executor/function_executor_controller/{task_output.py → task_allocation_output.py} +24 -24
- indexify/executor/monitoring/desired_state_handler.py +24 -0
- indexify/executor/monitoring/reported_state_handler.py +22 -0
- indexify/executor/monitoring/server.py +4 -0
- indexify/executor/state_reconciler.py +26 -19
- indexify/executor/state_reporter.py +9 -4
- {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/METADATA +2 -2
- {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/RECORD +27 -25
- indexify/executor/function_executor_controller/completed_task_metrics.py +0 -83
- indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -68
- indexify/executor/function_executor_controller/metrics/finalize_task.py +0 -20
- indexify/executor/function_executor_controller/metrics/prepare_task.py +0 -18
- /indexify/executor/function_executor_controller/metrics/{run_task.py → run_task_allocation.py} +0 -0
- {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/WHEEL +0 -0
- {indexify-0.4.28.dist-info → indexify-0.4.30.dist-info}/entry_points.txt +0 -0
@@ -13,16 +13,16 @@ from indexify.proto.executor_api_pb2 import (
|
|
13
13
|
)
|
14
14
|
|
15
15
|
|
16
|
-
class
|
17
|
-
"""Metrics for a task."""
|
16
|
+
class TaskAllocationMetrics:
|
17
|
+
"""Metrics for a task allocation."""
|
18
18
|
|
19
19
|
def __init__(self, counters: Dict[str, int], timers: Dict[str, float]):
|
20
20
|
self.counters = counters
|
21
21
|
self.timers = timers
|
22
22
|
|
23
23
|
|
24
|
-
class
|
25
|
-
"""Result of running a task."""
|
24
|
+
class TaskAllocationOutput:
|
25
|
+
"""Result of running a task allocation."""
|
26
26
|
|
27
27
|
def __init__(
|
28
28
|
self,
|
@@ -34,7 +34,7 @@ class TaskOutput:
|
|
34
34
|
invocation_error_output: Optional[SerializedObjectInsideBLOB] = None,
|
35
35
|
uploaded_invocation_error_blob: Optional[BLOB] = None,
|
36
36
|
next_functions: List[str] = [],
|
37
|
-
metrics: Optional[
|
37
|
+
metrics: Optional[TaskAllocationMetrics] = None,
|
38
38
|
execution_start_time: Optional[float] = None,
|
39
39
|
execution_end_time: Optional[float] = None,
|
40
40
|
):
|
@@ -56,9 +56,9 @@ class TaskOutput:
|
|
56
56
|
allocation: TaskAllocation,
|
57
57
|
execution_start_time: Optional[float],
|
58
58
|
execution_end_time: Optional[float],
|
59
|
-
) -> "
|
60
|
-
"""Creates a
|
61
|
-
return
|
59
|
+
) -> "TaskAllocationOutput":
|
60
|
+
"""Creates a TaskAllocationOutput for an internal error."""
|
61
|
+
return TaskAllocationOutput(
|
62
62
|
allocation=allocation,
|
63
63
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
64
64
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR,
|
@@ -72,9 +72,9 @@ class TaskOutput:
|
|
72
72
|
allocation: TaskAllocation,
|
73
73
|
execution_start_time: Optional[float],
|
74
74
|
execution_end_time: Optional[float],
|
75
|
-
) -> "
|
76
|
-
"""Creates a
|
77
|
-
return
|
75
|
+
) -> "TaskAllocationOutput":
|
76
|
+
"""Creates a TaskAllocationOutput for a function timeout error."""
|
77
|
+
return TaskAllocationOutput(
|
78
78
|
allocation=allocation,
|
79
79
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
80
80
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
|
@@ -88,10 +88,10 @@ class TaskOutput:
|
|
88
88
|
allocation: TaskAllocation,
|
89
89
|
execution_start_time: Optional[float],
|
90
90
|
execution_end_time: Optional[float],
|
91
|
-
) -> "
|
92
|
-
"""Creates a
|
91
|
+
) -> "TaskAllocationOutput":
|
92
|
+
"""Creates a TaskAllocationOutput for an unresponsive FE aka grey failure."""
|
93
93
|
# When FE is unresponsive we don't know exact cause of the failure.
|
94
|
-
return
|
94
|
+
return TaskAllocationOutput(
|
95
95
|
allocation=allocation,
|
96
96
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
97
97
|
# Treat the grey failure as a function error and thus charge the customer.
|
@@ -102,14 +102,14 @@ class TaskOutput:
|
|
102
102
|
)
|
103
103
|
|
104
104
|
@classmethod
|
105
|
-
def
|
105
|
+
def task_allocation_cancelled(
|
106
106
|
cls,
|
107
107
|
allocation: TaskAllocation,
|
108
108
|
execution_start_time: Optional[float],
|
109
109
|
execution_end_time: Optional[float],
|
110
|
-
) -> "
|
111
|
-
"""Creates a
|
112
|
-
return
|
110
|
+
) -> "TaskAllocationOutput":
|
111
|
+
"""Creates a TaskAllocationOutput for the case when task allocation didn't finish because its allocation was removed by Server."""
|
112
|
+
return TaskAllocationOutput(
|
113
113
|
allocation=allocation,
|
114
114
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
115
115
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED,
|
@@ -121,9 +121,9 @@ class TaskOutput:
|
|
121
121
|
def function_executor_terminated(
|
122
122
|
cls,
|
123
123
|
allocation: TaskAllocation,
|
124
|
-
) -> "
|
125
|
-
"""Creates a
|
126
|
-
return
|
124
|
+
) -> "TaskAllocationOutput":
|
125
|
+
"""Creates a TaskAllocationOutput for the case when task allocation didn't run because its FE terminated."""
|
126
|
+
return TaskAllocationOutput(
|
127
127
|
allocation=allocation,
|
128
128
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
129
129
|
failure_reason=TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
@@ -135,9 +135,9 @@ class TaskOutput:
|
|
135
135
|
allocation: TaskAllocation,
|
136
136
|
fe_termination_reason: FunctionExecutorTerminationReason,
|
137
137
|
logger: Any,
|
138
|
-
) -> "
|
139
|
-
"""Creates a
|
140
|
-
return
|
138
|
+
) -> "TaskAllocationOutput":
|
139
|
+
"""Creates a TaskAllocationOutput for the case when we fail a task allocation that didn't run because its FE startup failed."""
|
140
|
+
return TaskAllocationOutput(
|
141
141
|
allocation=allocation,
|
142
142
|
outcome_code=TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE,
|
143
143
|
failure_reason=_fe_startup_failure_reason_to_task_failure_reason(
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from aiohttp import web
|
2
|
+
|
3
|
+
from indexify.proto.executor_api_pb2 import (
|
4
|
+
DesiredExecutorState,
|
5
|
+
)
|
6
|
+
|
7
|
+
from ..state_reconciler import ExecutorStateReconciler
|
8
|
+
from .handler import Handler
|
9
|
+
|
10
|
+
|
11
|
+
class DesiredStateHandler(Handler):
|
12
|
+
def __init__(self, state_reconciler: ExecutorStateReconciler):
|
13
|
+
self._state_reconciler = state_reconciler
|
14
|
+
|
15
|
+
async def handle(self, request: web.Request) -> web.Response:
|
16
|
+
desired_state: DesiredExecutorState | None = (
|
17
|
+
self._state_reconciler.get_desired_state()
|
18
|
+
)
|
19
|
+
if desired_state is None:
|
20
|
+
return web.Response(
|
21
|
+
status=200, text="No desired state received from Server yet"
|
22
|
+
)
|
23
|
+
else:
|
24
|
+
return web.Response(text=str(desired_state))
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from aiohttp import web
|
2
|
+
|
3
|
+
from indexify.proto.executor_api_pb2 import (
|
4
|
+
ReportExecutorStateRequest,
|
5
|
+
)
|
6
|
+
|
7
|
+
from ..state_reporter import ExecutorStateReporter
|
8
|
+
from .handler import Handler
|
9
|
+
|
10
|
+
|
11
|
+
class ReportedStateHandler(Handler):
|
12
|
+
def __init__(self, state_reporter: ExecutorStateReporter):
|
13
|
+
self._state_reporter = state_reporter
|
14
|
+
|
15
|
+
async def handle(self, request: web.Request) -> web.Response:
|
16
|
+
request: ReportExecutorStateRequest | None = (
|
17
|
+
self._state_reporter.last_state_report_request()
|
18
|
+
)
|
19
|
+
if request is None:
|
20
|
+
return web.Response(status=200, text="No state reported so far")
|
21
|
+
else:
|
22
|
+
return web.Response(text=str(request))
|
@@ -11,6 +11,8 @@ class MonitoringServer:
|
|
11
11
|
startup_probe_handler: Handler,
|
12
12
|
health_probe_handler: Handler,
|
13
13
|
metrics_handler: Handler,
|
14
|
+
reported_state_handler: Handler,
|
15
|
+
desired_state_handler: Handler,
|
14
16
|
):
|
15
17
|
self._host = host
|
16
18
|
self._port = port
|
@@ -20,6 +22,8 @@ class MonitoringServer:
|
|
20
22
|
web.get("/monitoring/startup", startup_probe_handler.handle),
|
21
23
|
web.get("/monitoring/health", health_probe_handler.handle),
|
22
24
|
web.get("/monitoring/metrics", metrics_handler.handle),
|
25
|
+
web.get("/state/reported", reported_state_handler.handle),
|
26
|
+
web.get("/state/desired", desired_state_handler.handle),
|
23
27
|
]
|
24
28
|
)
|
25
29
|
self._app_runner: web.AppRunner = web.AppRunner(self._app)
|
@@ -88,6 +88,9 @@ class ExecutorStateReconciler:
|
|
88
88
|
)
|
89
89
|
self._last_desired_state: Optional[DesiredExecutorState] = None
|
90
90
|
|
91
|
+
def get_desired_state(self) -> Optional[DesiredExecutorState]:
|
92
|
+
return self._last_desired_state
|
93
|
+
|
91
94
|
def run(self):
|
92
95
|
"""Runs the state reconciler.
|
93
96
|
|
@@ -244,6 +247,8 @@ class ExecutorStateReconciler:
|
|
244
247
|
with metric_state_reconciliation_latency.time():
|
245
248
|
metric_state_reconciliations.inc()
|
246
249
|
await self._reconcile_state(last_reconciled_state)
|
250
|
+
# Update the clock regardless of success or failure.
|
251
|
+
# This is to show Server that we actually processed the message.
|
247
252
|
self._state_reporter.update_last_server_clock(
|
248
253
|
last_reconciled_state.clock
|
249
254
|
)
|
@@ -258,7 +263,7 @@ class ExecutorStateReconciler:
|
|
258
263
|
try:
|
259
264
|
# Reconcile FEs first because Tasks depend on them.
|
260
265
|
self._reconcile_function_executors(desired_state.function_executors)
|
261
|
-
self.
|
266
|
+
self._reconcile_task_allocations(desired_state.task_allocations)
|
262
267
|
return
|
263
268
|
except Exception as e:
|
264
269
|
self._logger.error(
|
@@ -380,38 +385,38 @@ class ExecutorStateReconciler:
|
|
380
385
|
self._function_executor_controllers.pop(function_executor_id, None)
|
381
386
|
self._shutting_down_fe_ids.discard(function_executor_id)
|
382
387
|
|
383
|
-
def
|
388
|
+
def _reconcile_task_allocations(self, task_allocations: Iterable[TaskAllocation]):
|
384
389
|
valid_task_allocations: List[TaskAllocation] = self._valid_task_allocations(
|
385
390
|
task_allocations
|
386
391
|
)
|
387
392
|
for task_allocation in valid_task_allocations:
|
388
|
-
self.
|
393
|
+
self._reconcile_task_allocation(task_allocation)
|
389
394
|
|
390
395
|
# Cancel tasks that are no longer in the desired state.
|
391
|
-
# FE ID => [
|
392
|
-
|
396
|
+
# FE ID => [Allocation ID]
|
397
|
+
desired_alloc_ids_per_fe: Dict[str, List[str]] = {}
|
393
398
|
for task_allocation in valid_task_allocations:
|
394
|
-
if task_allocation.function_executor_id not in
|
395
|
-
|
396
|
-
|
397
|
-
task_allocation.
|
399
|
+
if task_allocation.function_executor_id not in desired_alloc_ids_per_fe:
|
400
|
+
desired_alloc_ids_per_fe[task_allocation.function_executor_id] = []
|
401
|
+
desired_alloc_ids_per_fe[task_allocation.function_executor_id].append(
|
402
|
+
task_allocation.allocation_id
|
398
403
|
)
|
399
404
|
|
400
405
|
for fe_controller in self._function_executor_controllers.values():
|
401
406
|
fe_controller: FunctionExecutorController
|
402
|
-
if fe_controller.function_executor_id() in
|
403
|
-
|
404
|
-
|
407
|
+
if fe_controller.function_executor_id() in desired_alloc_ids_per_fe:
|
408
|
+
desired_fe_alloc_ids: Set[str] = set(
|
409
|
+
desired_alloc_ids_per_fe[fe_controller.function_executor_id()]
|
405
410
|
)
|
406
411
|
else:
|
407
412
|
# No tasks desired for this FE, so cancel all its tasks.
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
for
|
412
|
-
fe_controller.
|
413
|
+
desired_fe_alloc_ids: Set[str] = set()
|
414
|
+
actual_fe_alloc_ids: Set[str] = set(fe_controller.task_allocation_ids())
|
415
|
+
alloc_ids_to_remove: Set[str] = actual_fe_alloc_ids - desired_fe_alloc_ids
|
416
|
+
for alloc_id in alloc_ids_to_remove:
|
417
|
+
fe_controller.remove_task_allocation(alloc_id)
|
413
418
|
|
414
|
-
def
|
419
|
+
def _reconcile_task_allocation(self, task_allocation: TaskAllocation):
|
415
420
|
"""Reconciles a single TaskAllocation with the desired state.
|
416
421
|
|
417
422
|
Doesn't raise any exceptions.
|
@@ -419,7 +424,9 @@ class ExecutorStateReconciler:
|
|
419
424
|
function_executor_controller: FunctionExecutorController = (
|
420
425
|
self._function_executor_controllers[task_allocation.function_executor_id]
|
421
426
|
)
|
422
|
-
if function_executor_controller.
|
427
|
+
if function_executor_controller.has_task_allocation(
|
428
|
+
task_allocation.allocation_id
|
429
|
+
):
|
423
430
|
# Nothing to do, task already exists and it's immutable.
|
424
431
|
return
|
425
432
|
|
@@ -89,6 +89,10 @@ class ExecutorStateReporter:
|
|
89
89
|
self._pending_task_results: List[TaskResult] = []
|
90
90
|
self._pending_fe_updates: List[FunctionExecutorUpdate] = []
|
91
91
|
self._function_executor_states: Dict[str, FunctionExecutorState] = {}
|
92
|
+
self._last_state_report_request: Optional[ReportExecutorStateRequest] = None
|
93
|
+
|
94
|
+
def last_state_report_request(self) -> Optional[ReportExecutorStateRequest]:
|
95
|
+
return self._last_state_report_request
|
92
96
|
|
93
97
|
def update_executor_status(self, value: ExecutorStatus) -> None:
|
94
98
|
self._executor_status = value
|
@@ -203,7 +207,11 @@ class ExecutorStateReporter:
|
|
203
207
|
try:
|
204
208
|
state: ExecutorState = self._current_executor_state()
|
205
209
|
update: ExecutorUpdate = self._remove_pending_update()
|
210
|
+
request: ReportExecutorStateRequest = ReportExecutorStateRequest(
|
211
|
+
executor_state=state, executor_update=update
|
212
|
+
)
|
206
213
|
_log_reported_executor_update(update, self._logger)
|
214
|
+
self._last_state_report_request = request
|
207
215
|
|
208
216
|
with (
|
209
217
|
metric_state_report_rpc_errors.count_exceptions(),
|
@@ -211,10 +219,7 @@ class ExecutorStateReporter:
|
|
211
219
|
):
|
212
220
|
metric_state_report_rpcs.inc()
|
213
221
|
await stub.report_executor_state(
|
214
|
-
|
215
|
-
executor_state=state, executor_update=update
|
216
|
-
),
|
217
|
-
timeout=_REPORT_RPC_TIMEOUT_SEC,
|
222
|
+
request, timeout=_REPORT_RPC_TIMEOUT_SEC
|
218
223
|
)
|
219
224
|
self._state_reported_event.set()
|
220
225
|
self._health_checker.server_connection_state_changed(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.30
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: aiohttp (>=3.12.15,<4.0.0)
|
17
|
-
Requires-Dist: boto3 (>=1.40.
|
17
|
+
Requires-Dist: boto3 (>=1.40.15,<2.0.0)
|
18
18
|
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
19
19
|
Requires-Dist: httpx[http2] (==0.27.2)
|
20
20
|
Requires-Dist: nanoid (>=2.0.0,<3.0.0)
|
@@ -8,7 +8,7 @@ indexify/executor/blob_store/local_fs_blob_store.py,sha256=nRFawLMbOCCFlCIx2ccmh
|
|
8
8
|
indexify/executor/blob_store/metrics/blob_store.py,sha256=3lmLU8q4Yx87RIYcy56nmFiNQTPY94pB12ht7X6MyhA,3811
|
9
9
|
indexify/executor/blob_store/s3_blob_store.py,sha256=wJlDBTTaq48Vp1I0LvP2958b1Xe8esvarkr5PVRawU0,7609
|
10
10
|
indexify/executor/channel_manager.py,sha256=ihKfWJmUqQvh4UKXewZLzyJWW_f50P4fnwPqPonrozw,6651
|
11
|
-
indexify/executor/executor.py,sha256
|
11
|
+
indexify/executor/executor.py,sha256=--qzHfQnbP70hsV7Y1L6dlgsORAi-Ugwu0W795NgYyc,6645
|
12
12
|
indexify/executor/function_allowlist.py,sha256=PCelCW6qIe_2sH11BCKr7LDqarRV5kwNsrfB2EV7Zwo,1772
|
13
13
|
indexify/executor/function_executor/function_executor.py,sha256=dTZ8y15ifu7GKmNLU-SQH5M3COa1_8ec_2439h67Pd8,12381
|
14
14
|
indexify/executor/function_executor/health_checker.py,sha256=IxE0jnC99K_lvnizFLjXqS1942H8-FNAN4AlhLIjg2Y,6373
|
@@ -21,28 +21,28 @@ indexify/executor/function_executor/server/function_executor_server.py,sha256=_D
|
|
21
21
|
indexify/executor/function_executor/server/function_executor_server_factory.py,sha256=pZ3tQoaeWP2NDaR-A0PUYmzrBz768U2b9ENBFQG1INg,1814
|
22
22
|
indexify/executor/function_executor/server/subprocess_function_executor_server.py,sha256=JekDOqF7oFD4J6zcN3xB0Dxd1cgpEXMOsb_rKZOeBlI,668
|
23
23
|
indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py,sha256=w5aGQPHWLpixlP9-BbZu6oL_muMA95-hr7WKVxiEL7Q,4303
|
24
|
-
indexify/executor/function_executor_controller/__init__.py,sha256=
|
24
|
+
indexify/executor/function_executor_controller/__init__.py,sha256=jJQa09kQw05ycJhIWGv5-RoWe8WIymoCki5mo5P3Nyo,523
|
25
25
|
indexify/executor/function_executor_controller/aio_utils.py,sha256=nohPk9k38FpZ87y5jgbb-UhUNvf-GRETkyyRBp7WnVw,804
|
26
|
-
indexify/executor/function_executor_controller/
|
26
|
+
indexify/executor/function_executor_controller/completed_task_allocation_metrics.py,sha256=eYtV0--vwd_xwdRBTr_FRsbQ3D4bjOuIQ94TjEy5kR8,4137
|
27
27
|
indexify/executor/function_executor_controller/create_function_executor.py,sha256=_VLmT9zmo0Hvt4K4WkC8PCB9qNgTv8k9QkwTSAOQRDU,11158
|
28
28
|
indexify/executor/function_executor_controller/debug_event_loop.py,sha256=VJOKe_c9HjIDVCjhMY3Yqyeq1tAM1eVa2chZa6CMf-U,1016
|
29
29
|
indexify/executor/function_executor_controller/downloads.py,sha256=B2dbaa6osp1_vCQ6WY_9znAca3Z2qqVzQAF2av3v8Pg,5304
|
30
|
-
indexify/executor/function_executor_controller/events.py,sha256=
|
31
|
-
indexify/executor/function_executor_controller/
|
32
|
-
indexify/executor/function_executor_controller/function_executor_controller.py,sha256=
|
33
|
-
indexify/executor/function_executor_controller/loggers.py,sha256=
|
34
|
-
indexify/executor/function_executor_controller/message_validators.py,sha256=
|
35
|
-
indexify/executor/function_executor_controller/metrics/
|
30
|
+
indexify/executor/function_executor_controller/events.py,sha256=Ly8ypjgyPdOThyj9TXN15RZTJ6ipP_nDNiYND9Ghyr4,5795
|
31
|
+
indexify/executor/function_executor_controller/finalize_task_allocation.py,sha256=hYCULPN0LWpPJXwHtyLUYSUQOGC3Lp47SQuLLJAO90A,7135
|
32
|
+
indexify/executor/function_executor_controller/function_executor_controller.py,sha256=jq4UV_6op9GbjmcjEmSDxVbb_DtUj1gVKF24rmZU4dU,41698
|
33
|
+
indexify/executor/function_executor_controller/loggers.py,sha256=KNXlb7n3CssMbYmt5DrH-cUgfYEf38JM1W82uzkYsZY,3556
|
34
|
+
indexify/executor/function_executor_controller/message_validators.py,sha256=fxF-sNC1Pf84NmbDb0-Yg6I6OccgjQeWjSkaPkIcip8,3055
|
35
|
+
indexify/executor/function_executor_controller/metrics/completed_task_allocation_metrics.py,sha256=iW-is4V4mv9umIQQar_2k_rlwnaTLs5uzCo-TuCylpg,3155
|
36
36
|
indexify/executor/function_executor_controller/metrics/downloads.py,sha256=G8UUDfnzmiK_26OvZYTqH0KgNb3kI-0TgzGLFEuSEFc,892
|
37
|
-
indexify/executor/function_executor_controller/metrics/
|
38
|
-
indexify/executor/function_executor_controller/metrics/function_executor_controller.py,sha256=
|
39
|
-
indexify/executor/function_executor_controller/metrics/
|
40
|
-
indexify/executor/function_executor_controller/metrics/
|
41
|
-
indexify/executor/function_executor_controller/
|
42
|
-
indexify/executor/function_executor_controller/
|
43
|
-
indexify/executor/function_executor_controller/
|
44
|
-
indexify/executor/function_executor_controller/
|
45
|
-
indexify/executor/function_executor_controller/
|
37
|
+
indexify/executor/function_executor_controller/metrics/finalize_task_allocation.py,sha256=o2T3j9mTJ1Zjfje1Zuiw2BMOTLoSLyF-wSVhmNEUcbQ,940
|
38
|
+
indexify/executor/function_executor_controller/metrics/function_executor_controller.py,sha256=3nMsgtRbPHIleU8FQZqrdcraJd-7rAjqF7i1PcQRyq8,2803
|
39
|
+
indexify/executor/function_executor_controller/metrics/prepare_task_allocation.py,sha256=re07otwicIbPBFN43kihKEzuxN-4ZNt74LyrXKtI68I,971
|
40
|
+
indexify/executor/function_executor_controller/metrics/run_task_allocation.py,sha256=ZFv_nw5_pKUJoTaavSyzdglQKW4uvC2XyK8S6xi9xLQ,1064
|
41
|
+
indexify/executor/function_executor_controller/prepare_task_allocation.py,sha256=uv6_4Qd8DhJUhmp6eaCDrua4j6LLFqOjr6IwYmiOIHQ,9503
|
42
|
+
indexify/executor/function_executor_controller/run_task_allocation.py,sha256=5J4BhxBhmvqm9nd1i5YNN6Z_49BGdPafXZw4VWoX71Q,15842
|
43
|
+
indexify/executor/function_executor_controller/task_allocation_info.py,sha256=7gqSp90SuoGxpV5K80YrK2YhrLteeh5xSH--eStkX8s,1098
|
44
|
+
indexify/executor/function_executor_controller/task_allocation_input.py,sha256=lST2UgjzsDbMAh6G9vsdnb_lRq9s0BObbz4RX3ggXag,897
|
45
|
+
indexify/executor/function_executor_controller/task_allocation_output.py,sha256=pJC3FYxFyVMnbv2ci8KR8ONAWtjO48cE39dpM9sdSCA,7403
|
46
46
|
indexify/executor/function_executor_controller/terminate_function_executor.py,sha256=GHkMEidd4zbkulFWAeLGX1HsXtZvPJXh4dEusgy2ioA,1731
|
47
47
|
indexify/executor/host_resources/host_resources.py,sha256=eUyP05EX7QdOtQ5vbX_KCpvnBS2B7fl06UWeF9Oigns,3813
|
48
48
|
indexify/executor/host_resources/nvidia_gpu.py,sha256=uTCkLXnozZSpax8VApt0QMMM9YcBUK9eggYpwmLz09I,3308
|
@@ -52,6 +52,7 @@ indexify/executor/metrics/channel_manager.py,sha256=1dU9bzF3xqBy1nY9Sc66GfQQWnWZ
|
|
52
52
|
indexify/executor/metrics/executor.py,sha256=8dJXmyGqKlBSrPuyWXW7O2I21uxQ687l-2dYTvz4fmk,398
|
53
53
|
indexify/executor/metrics/state_reconciler.py,sha256=BSlRgvgtwih6QcYrsFU5P2ylaXAsC_X70DbzDuv9NsU,584
|
54
54
|
indexify/executor/metrics/state_reporter.py,sha256=JvyP_IUfJQetEjzmoWe9q6HCA4Ao1GLocaa7Od_jl2g,550
|
55
|
+
indexify/executor/monitoring/desired_state_handler.py,sha256=jmpTSQY6VyDbPFGKMpj-dAv0un5dtQmYc1tonQDBWL8,755
|
55
56
|
indexify/executor/monitoring/handler.py,sha256=Cj1cu_LcsAP0tdviqNhoEtGm4h0OJAxxzW9C2YdNXYU,240
|
56
57
|
indexify/executor/monitoring/health_check_handler.py,sha256=e1pEtWFKaVs6H57Z4YLejNECrJtC38PweZc7xTJeqVw,695
|
57
58
|
indexify/executor/monitoring/health_checker/generic_health_checker.py,sha256=vJRV879GrdZFqwTnM9pRLA97LRMutGz2sWRy-KS-tfg,1493
|
@@ -59,15 +60,16 @@ indexify/executor/monitoring/health_checker/health_checker.py,sha256=B-Q4KM1iEUS
|
|
59
60
|
indexify/executor/monitoring/health_checker/metrics/health_checker.py,sha256=50JS4JaOdAgSk7iYaBV4J3tGXkRTzmIVR_jVOV66YOc,129
|
60
61
|
indexify/executor/monitoring/metrics.py,sha256=5BpNqDBDQiL2K962WDPQU2eSo5zD6I9vF2flGyBejts,7388
|
61
62
|
indexify/executor/monitoring/prometheus_metrics_handler.py,sha256=KiGqSf7rkXTfbDwThyXFpFe2jnuZD5q-5SBP_0GDo8Y,591
|
62
|
-
indexify/executor/monitoring/
|
63
|
+
indexify/executor/monitoring/reported_state_handler.py,sha256=R1C3tk8CF2xh7pbBgKzM1ADReDMEV9CyIRlAZ9NFado,697
|
64
|
+
indexify/executor/monitoring/server.py,sha256=aAKzL9J243Q9_41JY-4tSBdFKXR_ZOMz-DEJNtxfYC4,1483
|
63
65
|
indexify/executor/monitoring/startup_probe_handler.py,sha256=zXXsBU15SMlBx1bSFpxWDfed1VHtKKnwvLQ8-frpG98,425
|
64
|
-
indexify/executor/state_reconciler.py,sha256=
|
65
|
-
indexify/executor/state_reporter.py,sha256=
|
66
|
+
indexify/executor/state_reconciler.py,sha256=fA-2hgPR2YFI8wOwLXzURn-OhsZEqNhMzFlksDESclc,20638
|
67
|
+
indexify/executor/state_reporter.py,sha256=zXb6SvD1yA4tMDWxT_p995y8l490hifXRHX4LjN6WOA,15505
|
66
68
|
indexify/proto/executor_api.proto,sha256=YwLeLjyLHhs5qoWLA50uHY2KdKRGfBQBKZwE8VXmzeo,12871
|
67
69
|
indexify/proto/executor_api_pb2.py,sha256=vTG1-2Pp4OnTWFD4GYphgJ3cUbTbDjCOKstKrLBXB-E,16472
|
68
70
|
indexify/proto/executor_api_pb2.pyi,sha256=-6P-ef-fBJF0CTc4UucIzrDLCBVZpIEhEz2qhexvwjk,23175
|
69
71
|
indexify/proto/executor_api_pb2_grpc.py,sha256=u9GEQV4nm_GvApRxjVo806CkgBMBVReb5IVrcaDaliY,7520
|
70
|
-
indexify-0.4.
|
71
|
-
indexify-0.4.
|
72
|
-
indexify-0.4.
|
73
|
-
indexify-0.4.
|
72
|
+
indexify-0.4.30.dist-info/METADATA,sha256=rtJwA6hJc1w8kSC0HTsesyum276C4gH1iXKg_jK45rY,1390
|
73
|
+
indexify-0.4.30.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
74
|
+
indexify-0.4.30.dist-info/entry_points.txt,sha256=rMJqbE5KPZIXTPIfAtVIM4zpUElqYVgEYd6i7N23zzg,49
|
75
|
+
indexify-0.4.30.dist-info/RECORD,,
|
@@ -1,83 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
from indexify.proto.executor_api_pb2 import (
|
5
|
-
TaskFailureReason,
|
6
|
-
TaskOutcomeCode,
|
7
|
-
)
|
8
|
-
|
9
|
-
from .metrics.completed_task_metrics import (
|
10
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
11
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
12
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
13
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
14
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
15
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
16
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
17
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
18
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
19
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
20
|
-
metric_task_completion_latency,
|
21
|
-
metric_tasks_completed,
|
22
|
-
)
|
23
|
-
from .task_info import TaskInfo
|
24
|
-
|
25
|
-
|
26
|
-
def emit_completed_task_metrics(task_info: TaskInfo, logger: Any) -> None:
|
27
|
-
"""Emits Prometheus metrics for a completed task.
|
28
|
-
|
29
|
-
Doesn't raise any exceptions.
|
30
|
-
"""
|
31
|
-
logger = logger.bind(module=__name__)
|
32
|
-
metric_task_completion_latency.observe(time.monotonic() - task_info.start_time)
|
33
|
-
|
34
|
-
task_outcome_code: TaskOutcomeCode = task_info.output.outcome_code
|
35
|
-
task_failure_reason: TaskFailureReason = task_info.output.failure_reason
|
36
|
-
metric_tasks_completed.labels(
|
37
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
38
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
39
|
-
).inc()
|
40
|
-
if task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_SUCCESS:
|
41
|
-
metric_tasks_completed.labels(
|
42
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
43
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
44
|
-
).inc()
|
45
|
-
elif task_outcome_code == TaskOutcomeCode.TASK_OUTCOME_CODE_FAILURE:
|
46
|
-
if task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_INTERNAL_ERROR:
|
47
|
-
metric_tasks_completed.labels(
|
48
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
49
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
50
|
-
).inc()
|
51
|
-
elif (
|
52
|
-
task_failure_reason
|
53
|
-
== TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED
|
54
|
-
):
|
55
|
-
metric_tasks_completed.labels(
|
56
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
57
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
58
|
-
).inc()
|
59
|
-
elif (
|
60
|
-
task_failure_reason == TaskFailureReason.TASK_FAILURE_REASON_TASK_CANCELLED
|
61
|
-
):
|
62
|
-
metric_tasks_completed.labels(
|
63
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
64
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
65
|
-
).inc()
|
66
|
-
elif task_failure_reason in [
|
67
|
-
TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
|
68
|
-
TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
|
69
|
-
TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR,
|
70
|
-
]:
|
71
|
-
metric_tasks_completed.labels(
|
72
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
73
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
74
|
-
).inc()
|
75
|
-
else:
|
76
|
-
metric_tasks_completed.labels(
|
77
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
78
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
79
|
-
).inc()
|
80
|
-
logger.warning(
|
81
|
-
"unexpected task failure reason",
|
82
|
-
failure_reason=TaskFailureReason.Name(task_failure_reason),
|
83
|
-
)
|
@@ -1,68 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import (
|
4
|
-
latency_metric_for_customer_controlled_operation,
|
5
|
-
)
|
6
|
-
|
7
|
-
metric_tasks_completed: prometheus_client.Counter = prometheus_client.Counter(
|
8
|
-
"tasks_completed",
|
9
|
-
"Number of tasks that were completed",
|
10
|
-
["outcome_code", "failure_reason"],
|
11
|
-
)
|
12
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL = "all"
|
13
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS = "success"
|
14
|
-
METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE = "failure"
|
15
|
-
|
16
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL = "all"
|
17
|
-
# Used when the task is successfull.
|
18
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE = "none"
|
19
|
-
# Matches TASK_FAILURE_REASON_UNKNOWN
|
20
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN = "unknown"
|
21
|
-
# Includes all function errors including timeouts to reduce cardinality.
|
22
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR = "function_error"
|
23
|
-
# Includes all internal errors to reduce cardinality.
|
24
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR = "internal_error"
|
25
|
-
# Matches TASK_FAILURE_REASON_TASK_CANCELLED
|
26
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED = "task_cancelled"
|
27
|
-
# Matches TASK_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED
|
28
|
-
METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED = (
|
29
|
-
"function_executor_terminated"
|
30
|
-
)
|
31
|
-
|
32
|
-
# Valid combinations of the labels:
|
33
|
-
metric_tasks_completed.labels(
|
34
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_ALL,
|
35
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_ALL,
|
36
|
-
)
|
37
|
-
metric_tasks_completed.labels(
|
38
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_SUCCESS,
|
39
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_NONE,
|
40
|
-
)
|
41
|
-
|
42
|
-
metric_tasks_completed.labels(
|
43
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
44
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_UNKNOWN,
|
45
|
-
)
|
46
|
-
metric_tasks_completed.labels(
|
47
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
48
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_ERROR,
|
49
|
-
)
|
50
|
-
metric_tasks_completed.labels(
|
51
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
52
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_INTERNAL_ERROR,
|
53
|
-
)
|
54
|
-
metric_tasks_completed.labels(
|
55
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
56
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_TASK_CANCELLED,
|
57
|
-
)
|
58
|
-
metric_tasks_completed.labels(
|
59
|
-
outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
|
60
|
-
failure_reason=METRIC_TASKS_COMPLETED_FAILURE_REASON_FUNCTION_EXECUTOR_TERMINATED,
|
61
|
-
)
|
62
|
-
|
63
|
-
metric_task_completion_latency: prometheus_client.Histogram = (
|
64
|
-
latency_metric_for_customer_controlled_operation(
|
65
|
-
"task_completion",
|
66
|
-
"task completion from the moment it got fetched until its output got uploaded to blob store",
|
67
|
-
)
|
68
|
-
)
|
@@ -1,20 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import latency_metric_for_fast_operation
|
4
|
-
|
5
|
-
# Task finalization metrics.
|
6
|
-
metric_task_finalizations: prometheus_client.Counter = prometheus_client.Counter(
|
7
|
-
"task_finalizations",
|
8
|
-
"Number of task finalizations",
|
9
|
-
)
|
10
|
-
metric_task_finalization_errors: prometheus_client.Counter = prometheus_client.Counter(
|
11
|
-
"task_finalization_errors",
|
12
|
-
"Number of task finalization errors",
|
13
|
-
)
|
14
|
-
metric_tasks_finalizing: prometheus_client.Gauge = prometheus_client.Gauge(
|
15
|
-
"tasks_finalizing",
|
16
|
-
"Number of tasks currently finalizing",
|
17
|
-
)
|
18
|
-
metric_task_finalization_latency: prometheus_client.Histogram = (
|
19
|
-
latency_metric_for_fast_operation("task_finalization", "task finalization")
|
20
|
-
)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
import prometheus_client
|
2
|
-
|
3
|
-
from indexify.executor.monitoring.metrics import latency_metric_for_fast_operation
|
4
|
-
|
5
|
-
metric_task_preparations: prometheus_client.Counter = prometheus_client.Counter(
|
6
|
-
"task_preparations", "Number of task preparations for execution"
|
7
|
-
)
|
8
|
-
metric_task_preparation_errors: prometheus_client.Counter = prometheus_client.Counter(
|
9
|
-
"task_preparation_errors", "Number of task preparation errors"
|
10
|
-
)
|
11
|
-
metric_task_preparation_latency: prometheus_client.Histogram = (
|
12
|
-
latency_metric_for_fast_operation(
|
13
|
-
"task_preparation", "task preparation for execution"
|
14
|
-
)
|
15
|
-
)
|
16
|
-
metric_tasks_getting_prepared: prometheus_client.Gauge = prometheus_client.Gauge(
|
17
|
-
"tasks_getting_prepared", "Number of tasks currently getting prepared for execution"
|
18
|
-
)
|
/indexify/executor/function_executor_controller/metrics/{run_task.py → run_task_allocation.py}
RENAMED
File without changes
|
File without changes
|
File without changes
|