indexify 0.3.19__py3-none-any.whl → 0.3.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli/cli.py +12 -0
- indexify/executor/api_objects.py +11 -6
- indexify/executor/blob_store/blob_store.py +69 -0
- indexify/executor/blob_store/local_fs_blob_store.py +48 -0
- indexify/executor/blob_store/metrics/blob_store.py +33 -0
- indexify/executor/blob_store/s3_blob_store.py +88 -0
- indexify/executor/downloader.py +192 -27
- indexify/executor/executor.py +29 -13
- indexify/executor/function_executor/function_executor.py +1 -1
- indexify/executor/function_executor/function_executor_states_container.py +5 -0
- indexify/executor/function_executor/function_executor_status.py +2 -0
- indexify/executor/function_executor/health_checker.py +7 -2
- indexify/executor/function_executor/invocation_state_client.py +4 -2
- indexify/executor/function_executor/single_task_runner.py +2 -0
- indexify/executor/function_executor/task_output.py +8 -1
- indexify/executor/grpc/channel_manager.py +4 -3
- indexify/executor/grpc/function_executor_controller.py +163 -193
- indexify/executor/grpc/metrics/state_reconciler.py +17 -0
- indexify/executor/grpc/metrics/task_controller.py +8 -0
- indexify/executor/grpc/state_reconciler.py +305 -188
- indexify/executor/grpc/state_reporter.py +18 -10
- indexify/executor/grpc/task_controller.py +247 -189
- indexify/executor/metrics/task_reporter.py +17 -0
- indexify/executor/task_reporter.py +217 -94
- indexify/executor/task_runner.py +1 -0
- indexify/proto/executor_api.proto +37 -11
- indexify/proto/executor_api_pb2.py +49 -47
- indexify/proto/executor_api_pb2.pyi +55 -15
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/METADATA +2 -1
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/RECORD +32 -27
- indexify/executor/grpc/completed_tasks_container.py +0 -26
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/WHEEL +0 -0
- {indexify-0.3.19.dist-info → indexify-0.3.21.dist-info}/entry_points.txt +0 -0
@@ -25,6 +25,60 @@ from ..function_executor.server.function_executor_server_factory import (
|
|
25
25
|
)
|
26
26
|
|
27
27
|
|
28
|
+
def validate_function_executor_description(
|
29
|
+
function_executor_description: FunctionExecutorDescription,
|
30
|
+
) -> None:
|
31
|
+
"""Validates the supplied FE description.
|
32
|
+
|
33
|
+
Raises ValueError if the description is not valid.
|
34
|
+
"""
|
35
|
+
validator = MessageValidator(function_executor_description)
|
36
|
+
validator.required_field("id")
|
37
|
+
validator.required_field("namespace")
|
38
|
+
validator.required_field("graph_name")
|
39
|
+
validator.required_field("graph_version")
|
40
|
+
validator.required_field("function_name")
|
41
|
+
# TODO: Make graph required after we migrate to direct S3 downloads.
|
42
|
+
# image_uri is optional.
|
43
|
+
# secret_names can be empty.
|
44
|
+
# resource_limits is optional.
|
45
|
+
|
46
|
+
|
47
|
+
def function_executor_logger(
|
48
|
+
function_executor_description: FunctionExecutorDescription, logger: Any
|
49
|
+
) -> Any:
|
50
|
+
"""Returns a logger bound with the FE's metadata.
|
51
|
+
|
52
|
+
The function assumes that the FE might be invalid."""
|
53
|
+
return logger.bind(
|
54
|
+
function_executor_id=(
|
55
|
+
function_executor_description.id
|
56
|
+
if function_executor_description.HasField("id")
|
57
|
+
else None
|
58
|
+
),
|
59
|
+
namespace=(
|
60
|
+
function_executor_description.namespace
|
61
|
+
if function_executor_description.HasField("namespace")
|
62
|
+
else None
|
63
|
+
),
|
64
|
+
graph_name=(
|
65
|
+
function_executor_description.graph_name
|
66
|
+
if function_executor_description.HasField("graph_name")
|
67
|
+
else None
|
68
|
+
),
|
69
|
+
graph_version=(
|
70
|
+
function_executor_description.graph_version
|
71
|
+
if function_executor_description.HasField("graph_version")
|
72
|
+
else None
|
73
|
+
),
|
74
|
+
function_name=(
|
75
|
+
function_executor_description.function_name
|
76
|
+
if function_executor_description.HasField("function_name")
|
77
|
+
else None
|
78
|
+
),
|
79
|
+
)
|
80
|
+
|
81
|
+
|
28
82
|
class FunctionExecutorController:
|
29
83
|
def __init__(
|
30
84
|
self,
|
@@ -39,9 +93,9 @@ class FunctionExecutorController:
|
|
39
93
|
):
|
40
94
|
"""Initializes the FunctionExecutorController.
|
41
95
|
|
42
|
-
|
96
|
+
The supplied FunctionExecutorDescription must be already validated by the caller
|
97
|
+
using validate_function_executor_description().
|
43
98
|
"""
|
44
|
-
_validate_function_executor_description(function_executor_description)
|
45
99
|
self._executor_id: str = executor_id
|
46
100
|
self._function_executor_state: FunctionExecutorState = function_executor_state
|
47
101
|
self._function_executor_description: FunctionExecutorDescription = (
|
@@ -53,17 +107,10 @@ class FunctionExecutorController:
|
|
53
107
|
self._downloader: Downloader = downloader
|
54
108
|
self._base_url: str = base_url
|
55
109
|
self._config_path: str = config_path
|
56
|
-
self._logger: Any =
|
110
|
+
self._logger: Any = function_executor_logger(
|
111
|
+
function_executor_description, logger
|
112
|
+
).bind(
|
57
113
|
module=__name__,
|
58
|
-
function_executor_id=function_executor_description.id,
|
59
|
-
namespace=function_executor_description.namespace,
|
60
|
-
graph_name=function_executor_description.graph_name,
|
61
|
-
graph_version=function_executor_description.graph_version,
|
62
|
-
function_name=function_executor_description.function_name,
|
63
|
-
image_uri=function_executor_description.image_uri,
|
64
|
-
)
|
65
|
-
self._reconciliation_loop_task: asyncio.Task = asyncio.create_task(
|
66
|
-
self._reconciliation_loop()
|
67
114
|
)
|
68
115
|
# The locks protects the desired status.
|
69
116
|
self._lock: asyncio.Lock = asyncio.Lock()
|
@@ -74,13 +121,31 @@ class FunctionExecutorController:
|
|
74
121
|
self._desired_status_change_notifier: asyncio.Condition = asyncio.Condition(
|
75
122
|
lock=self._lock
|
76
123
|
)
|
124
|
+
# Automatically start the controller on creation.
|
125
|
+
self._reconciliation_loop_task: asyncio.Task = asyncio.create_task(
|
126
|
+
self._reconciliation_loop(),
|
127
|
+
name="function executor controller reconciliation loop",
|
128
|
+
)
|
129
|
+
|
130
|
+
def function_executor_description(self) -> FunctionExecutorDescription:
|
131
|
+
return self._function_executor_description
|
132
|
+
|
133
|
+
async def startup(self) -> None:
|
134
|
+
await self._set_desired_status(
|
135
|
+
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_IDLE
|
136
|
+
)
|
77
137
|
|
78
|
-
async def
|
138
|
+
async def shutdown(self) -> None:
|
139
|
+
await self._set_desired_status(
|
140
|
+
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_SHUTDOWN
|
141
|
+
)
|
142
|
+
|
143
|
+
async def _set_desired_status(
|
79
144
|
self, desired_status: FunctionExecutorStatusProto
|
80
145
|
) -> None:
|
81
146
|
"""Updates the desired Function Executor status.
|
82
147
|
|
83
|
-
Reconciliation is done asynchronously.
|
148
|
+
Reconciliation is done asynchronously. Doesn't raise any exceptions.
|
84
149
|
"""
|
85
150
|
async with self._lock:
|
86
151
|
if self._desired_status == desired_status:
|
@@ -105,146 +170,73 @@ class FunctionExecutorController:
|
|
105
170
|
await self._reconcile(last_seen_desired_status)
|
106
171
|
|
107
172
|
async def _reconcile(self, desired_status: FunctionExecutorStatusProto) -> None:
|
108
|
-
|
109
|
-
current_status: FunctionExecutorStatus = (
|
110
|
-
self._function_executor_state.status
|
111
|
-
)
|
112
|
-
# We have to process all possible combination of current and desired statuses.
|
113
|
-
if current_status == FunctionExecutorStatus.STARTUP_FAILED_CUSTOMER_ERROR:
|
114
|
-
if (
|
115
|
-
desired_status
|
116
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_CUSTOMER_ERROR
|
117
|
-
):
|
118
|
-
return # Same status, nothing to do.
|
119
|
-
|
120
|
-
# All we can do from the current status is to destroy the FE to possibly recreate it later
|
121
|
-
# if Server requests to do this. This is why we don't accept any other desired statuses.
|
122
|
-
return await self._destroy_or_shutdown_fe_if_desired(desired_status)
|
123
|
-
|
124
|
-
if current_status == FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR:
|
125
|
-
if (
|
126
|
-
desired_status
|
127
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTUP_FAILED_PLATFORM_ERROR
|
128
|
-
):
|
129
|
-
return # Same status, nothing to do.
|
130
|
-
|
131
|
-
# All we can do from the current status is to destroy the FE to possibly recreate it later
|
132
|
-
# if Server requests to do this. This is why we don't accept any other desired statuses.
|
133
|
-
return await self._destroy_or_shutdown_fe_if_desired(desired_status)
|
134
|
-
|
135
|
-
if current_status == FunctionExecutorStatus.IDLE:
|
136
|
-
if (
|
137
|
-
desired_status
|
138
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_IDLE
|
139
|
-
):
|
140
|
-
return # Same status, nothing to do.
|
141
|
-
|
142
|
-
# Server can only request FE destroy or shutdown when FE has IDLE status.
|
143
|
-
# Transition from IDLE to RUNNING_TASK can only be done by Task controller.
|
144
|
-
# Transition from IDLE to UNHEALTHY can only be done by FE controller.
|
145
|
-
return await self._destroy_or_shutdown_fe_if_desired(desired_status)
|
146
|
-
|
147
|
-
if current_status == FunctionExecutorStatus.RUNNING_TASK:
|
148
|
-
if (
|
149
|
-
desired_status
|
150
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_RUNNING_TASK
|
151
|
-
):
|
152
|
-
return # Same status, nothing to do.
|
173
|
+
"""Reconciles the FE status with the desired status.
|
153
174
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
#
|
166
|
-
return await self.
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
)
|
173
|
-
return # Same status, nothing to do.
|
174
|
-
|
175
|
-
return await self._reconcile_from_destroyed(desired_status)
|
176
|
-
|
177
|
-
# _reconcile() can't be called when current FE status is one of "long running" states
|
178
|
-
# handled by FE controller like STARTING_UP and DESTROYING. This is because _reconcile()
|
179
|
-
# is called with concurrency of 1 and _reconcile() waits until these long running states
|
180
|
-
# (operations) are finished before returning.
|
181
|
-
#
|
182
|
-
# It's not possible to have SHUTDOWN current status because when FE controller transitions to SHUTDOWN
|
183
|
-
# status, it cancels the reconciliation loop task.
|
184
|
-
self._logger.error(
|
185
|
-
"unexpected current function executor status, skipping state reconciliation",
|
186
|
-
current_status=current_status.name,
|
187
|
-
desired_status=FunctionExecutorStatusProto.Name(desired_status),
|
188
|
-
)
|
175
|
+
Doesn't raise any exceptions."""
|
176
|
+
async with self._function_executor_state.lock:
|
177
|
+
if (
|
178
|
+
desired_status
|
179
|
+
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_IDLE
|
180
|
+
):
|
181
|
+
return await self._startup()
|
182
|
+
elif (
|
183
|
+
desired_status
|
184
|
+
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_SHUTDOWN
|
185
|
+
):
|
186
|
+
# Shutdown can be requested with any current status.
|
187
|
+
return await self._shutdown()
|
188
|
+
else:
|
189
|
+
self._logger.error(
|
190
|
+
"unexpected desired function executor status received from server, skipping state reconciliation",
|
191
|
+
current_status=self._function_executor_state.status.name,
|
192
|
+
desired_status=FunctionExecutorStatusProto.Name(desired_status),
|
193
|
+
)
|
189
194
|
|
190
|
-
async def
|
191
|
-
|
192
|
-
) -> None:
|
193
|
-
"""Destroys the Function Executor if desired status asks for it.
|
195
|
+
async def _shutdown(self) -> None:
|
196
|
+
"""Shutsdown the Function Executor and frees all of its resources.
|
194
197
|
|
195
|
-
|
196
|
-
Caller holds the FE state lock.
|
198
|
+
Caller holds the FE state lock. Doesn't raise any exceptions.
|
197
199
|
"""
|
198
|
-
if
|
199
|
-
|
200
|
-
|
201
|
-
|
200
|
+
# Run destroy sequence if current FE status requires it (see allows FE status transitions).
|
201
|
+
# We won't see DESTROYING and STARTING_UP statuses here because FE reconciliation is done
|
202
|
+
# with concurrency of 1.
|
203
|
+
if self._function_executor_state.status in [
|
204
|
+
FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR,
|
205
|
+
FunctionExecutorStatus.STARTUP_FAILED_CUSTOMER_ERROR,
|
206
|
+
FunctionExecutorStatus.IDLE,
|
207
|
+
FunctionExecutorStatus.RUNNING_TASK,
|
208
|
+
FunctionExecutorStatus.UNHEALTHY,
|
202
209
|
]:
|
203
|
-
self.
|
204
|
-
|
205
|
-
current_status=self._function_executor_state.status.name,
|
206
|
-
desired_status=FunctionExecutorStatusProto.Name(desired_status),
|
210
|
+
await self._function_executor_state.set_status(
|
211
|
+
FunctionExecutorStatus.DESTROYING
|
207
212
|
)
|
208
|
-
|
213
|
+
if self._function_executor_state.function_executor is not None:
|
214
|
+
async with _UnlockedLockContextManager(
|
215
|
+
self._function_executor_state.lock
|
216
|
+
):
|
217
|
+
await self._function_executor_state.function_executor.destroy()
|
218
|
+
await self._function_executor_state.set_status(
|
219
|
+
FunctionExecutorStatus.DESTROYED
|
220
|
+
)
|
221
|
+
self._function_executor_state.function_executor = None
|
209
222
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_SHUTDOWN
|
215
|
-
):
|
216
|
-
await self._shutdown()
|
217
|
-
# No code is executed after this point because reconciliation loop aio task is cancelled.
|
223
|
+
self._logger.info("shutting down function executor controller")
|
224
|
+
await self._function_executor_state.set_status(FunctionExecutorStatus.SHUTDOWN)
|
225
|
+
self._reconciliation_loop_task.cancel()
|
226
|
+
# No code is executed after this point because reconciliation loop aio task is cancelled.
|
218
227
|
|
219
|
-
async def
|
220
|
-
|
221
|
-
) -> None:
|
222
|
-
"""Reconciles the FE state when it has DESTROYED status.
|
228
|
+
async def _startup(self) -> None:
|
229
|
+
"""Startups the FE if possible.
|
223
230
|
|
224
|
-
Caller holds the FE state lock.
|
231
|
+
Caller holds the FE state lock. Doesn't raise any exceptions.
|
225
232
|
"""
|
226
|
-
if
|
227
|
-
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_STARTING_UP,
|
228
|
-
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_IDLE,
|
229
|
-
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_RUNNING_TASK,
|
230
|
-
FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_SHUTDOWN,
|
231
|
-
]:
|
233
|
+
if self._function_executor_state.status != FunctionExecutorStatus.DESTROYED:
|
232
234
|
self._logger.error(
|
233
|
-
"
|
235
|
+
"Can't startup Function Executor from its current state, skipping startup",
|
234
236
|
current_status=self._function_executor_state.status.name,
|
235
|
-
desired_status=FunctionExecutorStatusProto.Name(desired_status),
|
236
237
|
)
|
237
238
|
return
|
238
239
|
|
239
|
-
if (
|
240
|
-
desired_status
|
241
|
-
== FunctionExecutorStatusProto.FUNCTION_EXECUTOR_STATUS_SHUTDOWN
|
242
|
-
):
|
243
|
-
await self._shutdown()
|
244
|
-
# No code is executed after this point because reconciliation loop aio task is cancelled.
|
245
|
-
return
|
246
|
-
|
247
|
-
# All the rest of the allowed desired statuses ask to create the FE.
|
248
240
|
await self._function_executor_state.set_status(
|
249
241
|
FunctionExecutorStatus.STARTING_UP
|
250
242
|
)
|
@@ -267,6 +259,7 @@ class FunctionExecutorController:
|
|
267
259
|
next_status_message = str(e)
|
268
260
|
except Exception as e:
|
269
261
|
next_status = FunctionExecutorStatus.STARTUP_FAILED_PLATFORM_ERROR
|
262
|
+
self._logger.error("failed to create function executor", exc_info=e)
|
270
263
|
|
271
264
|
# FE state lock is acquired again at this point.
|
272
265
|
await self._function_executor_state.set_status(next_status, next_status_message)
|
@@ -279,47 +272,35 @@ class FunctionExecutorController:
|
|
279
272
|
self._health_check_failed_callback
|
280
273
|
)
|
281
274
|
|
282
|
-
async def _destroy_function_executor(self) -> None:
|
283
|
-
"""Destroys the Function Executor if it exists.
|
284
|
-
|
285
|
-
Caller holds the FE state lock.
|
286
|
-
"""
|
287
|
-
await self._function_executor_state.set_status(
|
288
|
-
FunctionExecutorStatus.DESTROYING
|
289
|
-
)
|
290
|
-
async with _UnlockedLockContextManager(self._function_executor_state.lock):
|
291
|
-
await self._function_executor_state.function_executor.destroy()
|
292
|
-
await self._function_executor_state.set_status(FunctionExecutorStatus.DESTROYED)
|
293
|
-
self._function_executor_state.function_executor = None
|
294
|
-
|
295
|
-
async def _shutdown(self) -> None:
|
296
|
-
"""Shuts down the controller.
|
297
|
-
|
298
|
-
Caller holds the FE state lock.
|
299
|
-
Raises asyncio.CancelledError on return when called from reconciliation loop.
|
300
|
-
"""
|
301
|
-
self._logger.info("shutting down function executor controller")
|
302
|
-
await self._function_executor_state.set_status(FunctionExecutorStatus.SHUTDOWN)
|
303
|
-
self._reconciliation_loop_task.cancel()
|
304
|
-
await self._reconciliation_loop_task
|
305
|
-
|
306
275
|
async def _health_check_failed_callback(self, result: HealthCheckResult):
|
307
276
|
async with self._function_executor_state.lock:
|
308
277
|
if self._function_executor_state.status == FunctionExecutorStatus.UNHEALTHY:
|
309
278
|
return
|
310
279
|
|
311
|
-
|
280
|
+
# There can be false positive health check failures when we're creating
|
281
|
+
# or destroying FEs so we only react to health check failures when we expect
|
282
|
+
# the FE to be healthy.
|
283
|
+
if self._function_executor_state.status not in (
|
312
284
|
FunctionExecutorStatus.IDLE,
|
313
285
|
FunctionExecutorStatus.RUNNING_TASK,
|
314
286
|
):
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
287
|
+
return
|
288
|
+
|
289
|
+
await self._function_executor_state.set_status(
|
290
|
+
FunctionExecutorStatus.UNHEALTHY
|
291
|
+
)
|
292
|
+
function_executor: FunctionExecutor = (
|
293
|
+
self._function_executor_state.function_executor
|
294
|
+
)
|
295
|
+
self._function_executor_state.function_executor = None
|
296
|
+
|
297
|
+
self._logger.error(
|
298
|
+
"Function Executor health check failed, destroying Function Executor",
|
299
|
+
health_check_fail_reason=result.reason,
|
300
|
+
)
|
301
|
+
# Destroy the unhealthy FE asap so it doesn't consume resources.
|
302
|
+
# Do it with unlocked state lock to not stop other work on this FE state.
|
303
|
+
await function_executor.destroy()
|
323
304
|
|
324
305
|
|
325
306
|
async def _create_function_executor(
|
@@ -341,12 +322,18 @@ async def _create_function_executor(
|
|
341
322
|
graph_name=function_executor_description.graph_name,
|
342
323
|
graph_version=function_executor_description.graph_version,
|
343
324
|
logger=logger,
|
325
|
+
data_payload=(
|
326
|
+
function_executor_description.graph
|
327
|
+
if function_executor_description.HasField("graph")
|
328
|
+
else None
|
329
|
+
),
|
344
330
|
)
|
345
331
|
|
346
332
|
config: FunctionExecutorServerConfiguration = FunctionExecutorServerConfiguration(
|
347
333
|
executor_id=executor_id,
|
348
334
|
function_executor_id=function_executor_description.id,
|
349
335
|
namespace=function_executor_description.namespace,
|
336
|
+
image_uri=None,
|
350
337
|
secret_names=list(function_executor_description.secret_names),
|
351
338
|
)
|
352
339
|
if function_executor_description.HasField("image_uri"):
|
@@ -361,8 +348,6 @@ async def _create_function_executor(
|
|
361
348
|
)
|
362
349
|
customer_code_timeout_sec: Optional[float] = None
|
363
350
|
if function_executor_description.HasField("customer_code_timeout_ms"):
|
364
|
-
# TODO: Add integration tests with FE customer code initialization timeout
|
365
|
-
# when end-to-end implementation is done.
|
366
351
|
customer_code_timeout_sec = (
|
367
352
|
function_executor_description.customer_code_timeout_ms / 1000.0
|
368
353
|
)
|
@@ -381,29 +366,14 @@ async def _create_function_executor(
|
|
381
366
|
customer_code_timeout_sec=customer_code_timeout_sec,
|
382
367
|
)
|
383
368
|
return function_executor
|
384
|
-
except Exception:
|
369
|
+
except (Exception, asyncio.CancelledError):
|
370
|
+
# Destroy the failed to startup FE asap so it doesn't consume resources.
|
371
|
+
# Destroy the FE also if the FE initialization got cancelled to not leak
|
372
|
+
# allocated resources.
|
385
373
|
await function_executor.destroy()
|
386
374
|
raise
|
387
375
|
|
388
376
|
|
389
|
-
def _validate_function_executor_description(
|
390
|
-
function_executor_description: FunctionExecutorDescription,
|
391
|
-
) -> None:
|
392
|
-
"""Validates the supplied FE description.
|
393
|
-
|
394
|
-
Raises ValueError if the description is not valid.
|
395
|
-
"""
|
396
|
-
validator = MessageValidator(function_executor_description)
|
397
|
-
validator.required_field("id")
|
398
|
-
validator.required_field("namespace")
|
399
|
-
validator.required_field("graph_name")
|
400
|
-
validator.required_field("graph_version")
|
401
|
-
validator.required_field("function_name")
|
402
|
-
# image_uri is optional.
|
403
|
-
# secret_names can be empty.
|
404
|
-
# resource_limits is optional.
|
405
|
-
|
406
|
-
|
407
377
|
class _UnlockedLockContextManager:
|
408
378
|
"""Unlocks its lock on enter to the scope and locks it back on exit."""
|
409
379
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
import prometheus_client
|
2
|
+
|
3
|
+
from ...monitoring.metrics import latency_metric_for_fast_operation
|
4
|
+
|
5
|
+
metric_state_reconciliations = prometheus_client.Counter(
|
6
|
+
"state_reconciliations",
|
7
|
+
"Number of Executor state reconciliations",
|
8
|
+
)
|
9
|
+
metric_state_reconciliation_errors = prometheus_client.Counter(
|
10
|
+
"state_reconciliation_errors",
|
11
|
+
"Number of Executor state reconciliation errors after all retries",
|
12
|
+
)
|
13
|
+
metric_state_reconciliation_latency: prometheus_client.Histogram = (
|
14
|
+
latency_metric_for_fast_operation(
|
15
|
+
"state_reconciliation", "Executor state reconciliation"
|
16
|
+
)
|
17
|
+
)
|