prefect-client 3.0.0rc9__py3-none-any.whl → 3.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prefect/_internal/compatibility/migration.py +48 -8
- prefect/_internal/concurrency/api.py +1 -1
- prefect/_internal/retries.py +61 -0
- prefect/agent.py +6 -0
- prefect/client/cloud.py +1 -1
- prefect/client/schemas/objects.py +3 -4
- prefect/concurrency/asyncio.py +3 -3
- prefect/concurrency/events.py +1 -1
- prefect/concurrency/services.py +3 -2
- prefect/concurrency/sync.py +19 -5
- prefect/context.py +14 -2
- prefect/deployments/__init__.py +28 -15
- prefect/deployments/schedules.py +5 -2
- prefect/deployments/steps/pull.py +7 -0
- prefect/events/schemas/automations.py +3 -3
- prefect/exceptions.py +4 -1
- prefect/filesystems.py +4 -3
- prefect/flow_engine.py +76 -14
- prefect/flows.py +222 -64
- prefect/futures.py +53 -7
- prefect/infrastructure/__init__.py +6 -0
- prefect/infrastructure/base.py +6 -0
- prefect/logging/loggers.py +1 -1
- prefect/results.py +50 -67
- prefect/runner/runner.py +93 -20
- prefect/runner/server.py +20 -22
- prefect/runner/submit.py +0 -8
- prefect/runtime/flow_run.py +38 -3
- prefect/serializers.py +3 -3
- prefect/settings.py +15 -45
- prefect/task_engine.py +77 -21
- prefect/task_runners.py +28 -16
- prefect/task_worker.py +6 -4
- prefect/tasks.py +30 -5
- prefect/transactions.py +18 -2
- prefect/utilities/asyncutils.py +9 -3
- prefect/utilities/engine.py +34 -1
- prefect/utilities/importtools.py +1 -1
- prefect/utilities/timeout.py +20 -5
- prefect/workers/base.py +98 -208
- prefect/workers/block.py +6 -0
- prefect/workers/cloud.py +6 -0
- prefect/workers/process.py +262 -4
- prefect/workers/server.py +27 -9
- {prefect_client-3.0.0rc9.dist-info → prefect_client-3.0.0rc11.dist-info}/METADATA +4 -4
- {prefect_client-3.0.0rc9.dist-info → prefect_client-3.0.0rc11.dist-info}/RECORD +49 -44
- {prefect_client-3.0.0rc9.dist-info → prefect_client-3.0.0rc11.dist-info}/LICENSE +0 -0
- {prefect_client-3.0.0rc9.dist-info → prefect_client-3.0.0rc11.dist-info}/WHEEL +0 -0
- {prefect_client-3.0.0rc9.dist-info → prefect_client-3.0.0rc11.dist-info}/top_level.txt +0 -0
prefect/transactions.py
CHANGED
@@ -22,7 +22,7 @@ from prefect.records.result_store import ResultFactoryStore
|
|
22
22
|
from prefect.results import (
|
23
23
|
BaseResult,
|
24
24
|
ResultFactory,
|
25
|
-
|
25
|
+
get_default_result_storage,
|
26
26
|
)
|
27
27
|
from prefect.utilities.asyncutils import run_coro_as_sync
|
28
28
|
from prefect.utilities.collections import AutoEnum
|
@@ -187,8 +187,16 @@ class Transaction(ContextModel):
|
|
187
187
|
|
188
188
|
for hook in self.on_commit_hooks:
|
189
189
|
hook_name = _get_hook_name(hook)
|
190
|
+
if self.logger:
|
191
|
+
self.logger.info(f"Running commit hook {hook_name!r}")
|
192
|
+
|
190
193
|
hook(self)
|
191
194
|
|
195
|
+
if self.logger:
|
196
|
+
self.logger.info(
|
197
|
+
f"Commit hook {hook_name!r} finished running successfully"
|
198
|
+
)
|
199
|
+
|
192
200
|
if self.store and self.key:
|
193
201
|
self.store.write(key=self.key, value=self._staged_value)
|
194
202
|
self.state = TransactionState.COMMITTED
|
@@ -235,8 +243,16 @@ class Transaction(ContextModel):
|
|
235
243
|
try:
|
236
244
|
for hook in reversed(self.on_rollback_hooks):
|
237
245
|
hook_name = _get_hook_name(hook)
|
246
|
+
if self.logger:
|
247
|
+
self.logger.info(f"Running rollback hook {hook_name!r}")
|
248
|
+
|
238
249
|
hook(self)
|
239
250
|
|
251
|
+
if self.logger:
|
252
|
+
self.logger.info(
|
253
|
+
f"Rollback hook {hook_name!r} finished running successfully"
|
254
|
+
)
|
255
|
+
|
240
256
|
self.state = TransactionState.ROLLED_BACK
|
241
257
|
|
242
258
|
for child in reversed(self.children):
|
@@ -297,7 +313,7 @@ def transaction(
|
|
297
313
|
}
|
298
314
|
)
|
299
315
|
else:
|
300
|
-
default_storage =
|
316
|
+
default_storage = get_default_result_storage(_sync=True)
|
301
317
|
if existing_factory:
|
302
318
|
new_factory = existing_factory.model_copy(
|
303
319
|
update={
|
prefect/utilities/asyncutils.py
CHANGED
@@ -184,7 +184,7 @@ def run_coro_as_sync(
|
|
184
184
|
coroutine: Awaitable[R],
|
185
185
|
force_new_thread: bool = False,
|
186
186
|
wait_for_result: bool = True,
|
187
|
-
) -> R:
|
187
|
+
) -> Union[R, None]:
|
188
188
|
"""
|
189
189
|
Runs a coroutine from a synchronous context, as if it were a synchronous
|
190
190
|
function.
|
@@ -216,7 +216,7 @@ def run_coro_as_sync(
|
|
216
216
|
else:
|
217
217
|
raise TypeError("`coroutine` must be a coroutine object")
|
218
218
|
|
219
|
-
async def coroutine_wrapper():
|
219
|
+
async def coroutine_wrapper() -> Union[R, None]:
|
220
220
|
"""
|
221
221
|
Set flags so that children (and grandchildren...) of this task know they are running in a new
|
222
222
|
thread and do not try to run on the run_sync thread, which would cause a
|
@@ -245,7 +245,13 @@ def run_coro_as_sync(
|
|
245
245
|
call = _cast_to_call(coroutine_wrapper)
|
246
246
|
runner = get_run_sync_loop()
|
247
247
|
runner.submit(call)
|
248
|
-
|
248
|
+
try:
|
249
|
+
return call.result()
|
250
|
+
except KeyboardInterrupt:
|
251
|
+
call.cancel()
|
252
|
+
|
253
|
+
logger.debug("Coroutine cancelled due to KeyboardInterrupt.")
|
254
|
+
raise
|
249
255
|
|
250
256
|
|
251
257
|
async def run_sync_in_worker_thread(
|
prefect/utilities/engine.py
CHANGED
@@ -51,6 +51,7 @@ from prefect.logging.loggers import (
|
|
51
51
|
)
|
52
52
|
from prefect.results import BaseResult
|
53
53
|
from prefect.settings import (
|
54
|
+
PREFECT_EXPERIMENTAL_ENABLE_CLIENT_SIDE_TASK_ORCHESTRATION,
|
54
55
|
PREFECT_LOGGING_LOG_PRINTS,
|
55
56
|
)
|
56
57
|
from prefect.states import (
|
@@ -558,7 +559,7 @@ def propose_state_sync(
|
|
558
559
|
)
|
559
560
|
|
560
561
|
|
561
|
-
def _dynamic_key_for_task_run(context: FlowRunContext, task: Task) -> int:
|
562
|
+
def _dynamic_key_for_task_run(context: FlowRunContext, task: Task) -> Union[int, str]:
|
562
563
|
if context.detached: # this task is running on remote infrastructure
|
563
564
|
return str(uuid4())
|
564
565
|
elif context.flow_run is None: # this is an autonomous task run
|
@@ -744,6 +745,12 @@ def emit_task_run_state_change_event(
|
|
744
745
|
"message": truncated_to(
|
745
746
|
state_message_truncation_length, initial_state.message
|
746
747
|
),
|
748
|
+
"state_details": initial_state.state_details.model_dump(
|
749
|
+
mode="json",
|
750
|
+
exclude_none=True,
|
751
|
+
exclude_unset=True,
|
752
|
+
exclude={"flow_run_id", "task_run_id"},
|
753
|
+
),
|
747
754
|
}
|
748
755
|
if initial_state
|
749
756
|
else None
|
@@ -754,7 +761,30 @@ def emit_task_run_state_change_event(
|
|
754
761
|
"message": truncated_to(
|
755
762
|
state_message_truncation_length, validated_state.message
|
756
763
|
),
|
764
|
+
"state_details": validated_state.state_details.model_dump(
|
765
|
+
mode="json",
|
766
|
+
exclude_none=True,
|
767
|
+
exclude_unset=True,
|
768
|
+
exclude={"flow_run_id", "task_run_id"},
|
769
|
+
),
|
770
|
+
"data": validated_state.data.model_dump(mode="json")
|
771
|
+
if isinstance(validated_state.data, BaseResult)
|
772
|
+
else None,
|
757
773
|
},
|
774
|
+
"task_run": task_run.model_dump(
|
775
|
+
mode="json",
|
776
|
+
exclude_none=True,
|
777
|
+
exclude={
|
778
|
+
"id",
|
779
|
+
"created",
|
780
|
+
"updated",
|
781
|
+
"flow_run_id",
|
782
|
+
"state_id",
|
783
|
+
"state_type",
|
784
|
+
"state_name",
|
785
|
+
"state",
|
786
|
+
},
|
787
|
+
),
|
758
788
|
},
|
759
789
|
resource={
|
760
790
|
"prefect.resource.id": f"prefect.task-run.{task_run.id}",
|
@@ -769,6 +799,9 @@ def emit_task_run_state_change_event(
|
|
769
799
|
else ""
|
770
800
|
),
|
771
801
|
"prefect.state-type": str(validated_state.type.value),
|
802
|
+
"prefect.orchestration": "client"
|
803
|
+
if PREFECT_EXPERIMENTAL_ENABLE_CLIENT_SIDE_TASK_ORCHESTRATION
|
804
|
+
else "server",
|
772
805
|
},
|
773
806
|
follows=follows,
|
774
807
|
)
|
prefect/utilities/importtools.py
CHANGED
@@ -422,7 +422,7 @@ def safe_load_namespace(source_code: str):
|
|
422
422
|
logger.debug("Failed to import from %s: %s", node.module, e)
|
423
423
|
|
424
424
|
# Handle local definitions
|
425
|
-
for node in
|
425
|
+
for node in parsed_code.body:
|
426
426
|
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.Assign)):
|
427
427
|
try:
|
428
428
|
# Compile and execute each class and function definition and assignment
|
prefect/utilities/timeout.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from asyncio import CancelledError
|
2
2
|
from contextlib import contextmanager
|
3
|
-
from typing import Optional
|
3
|
+
from typing import Optional, Type
|
4
4
|
|
5
5
|
from prefect._internal.concurrency.cancellation import (
|
6
6
|
cancel_async_after,
|
@@ -8,8 +8,19 @@ from prefect._internal.concurrency.cancellation import (
|
|
8
8
|
)
|
9
9
|
|
10
10
|
|
11
|
+
def fail_if_not_timeout_error(timeout_exc_type: Type[Exception]) -> None:
|
12
|
+
if not issubclass(timeout_exc_type, TimeoutError):
|
13
|
+
raise ValueError(
|
14
|
+
"The `timeout_exc_type` argument must be a subclass of `TimeoutError`."
|
15
|
+
)
|
16
|
+
|
17
|
+
|
11
18
|
@contextmanager
|
12
|
-
def timeout_async(
|
19
|
+
def timeout_async(
|
20
|
+
seconds: Optional[float] = None, timeout_exc_type: Type[TimeoutError] = TimeoutError
|
21
|
+
):
|
22
|
+
fail_if_not_timeout_error(timeout_exc_type)
|
23
|
+
|
13
24
|
if seconds is None:
|
14
25
|
yield
|
15
26
|
return
|
@@ -18,11 +29,15 @@ def timeout_async(seconds: Optional[float] = None):
|
|
18
29
|
with cancel_async_after(timeout=seconds):
|
19
30
|
yield
|
20
31
|
except CancelledError:
|
21
|
-
raise
|
32
|
+
raise timeout_exc_type(f"Scope timed out after {seconds} second(s).")
|
22
33
|
|
23
34
|
|
24
35
|
@contextmanager
|
25
|
-
def timeout(
|
36
|
+
def timeout(
|
37
|
+
seconds: Optional[float] = None, timeout_exc_type: Type[TimeoutError] = TimeoutError
|
38
|
+
):
|
39
|
+
fail_if_not_timeout_error(timeout_exc_type)
|
40
|
+
|
26
41
|
if seconds is None:
|
27
42
|
yield
|
28
43
|
return
|
@@ -31,4 +46,4 @@ def timeout(seconds: Optional[float] = None):
|
|
31
46
|
with cancel_sync_after(timeout=seconds):
|
32
47
|
yield
|
33
48
|
except CancelledError:
|
34
|
-
raise
|
49
|
+
raise timeout_exc_type(f"Scope timed out after {seconds} second(s).")
|
prefect/workers/base.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
import abc
|
2
2
|
import inspect
|
3
|
-
import
|
4
|
-
from
|
3
|
+
import threading
|
4
|
+
from contextlib import AsyncExitStack
|
5
|
+
from functools import partial
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Type, Union
|
5
7
|
from uuid import uuid4
|
6
8
|
|
7
9
|
import anyio
|
@@ -12,49 +14,31 @@ from pydantic.json_schema import GenerateJsonSchema
|
|
12
14
|
from typing_extensions import Literal
|
13
15
|
|
14
16
|
import prefect
|
15
|
-
from prefect._internal.compatibility.experimental import (
|
16
|
-
EXPERIMENTAL_WARNING,
|
17
|
-
ExperimentalFeature,
|
18
|
-
experiment_enabled,
|
19
|
-
)
|
20
17
|
from prefect._internal.schemas.validators import return_v_or_none
|
21
18
|
from prefect.client.orchestration import PrefectClient, get_client
|
22
19
|
from prefect.client.schemas.actions import WorkPoolCreate, WorkPoolUpdate
|
23
|
-
from prefect.client.schemas.filters import (
|
24
|
-
FlowRunFilter,
|
25
|
-
FlowRunFilterId,
|
26
|
-
FlowRunFilterState,
|
27
|
-
FlowRunFilterStateName,
|
28
|
-
FlowRunFilterStateType,
|
29
|
-
WorkPoolFilter,
|
30
|
-
WorkPoolFilterName,
|
31
|
-
WorkQueueFilter,
|
32
|
-
WorkQueueFilterName,
|
33
|
-
)
|
34
20
|
from prefect.client.schemas.objects import StateType, WorkPool
|
35
21
|
from prefect.client.utilities import inject_client
|
36
22
|
from prefect.events import Event, RelatedResource, emit_event
|
37
23
|
from prefect.events.related import object_as_related_resource, tags_as_related_resources
|
38
24
|
from prefect.exceptions import (
|
39
25
|
Abort,
|
40
|
-
InfrastructureNotAvailable,
|
41
|
-
InfrastructureNotFound,
|
42
26
|
ObjectNotFound,
|
43
27
|
)
|
44
28
|
from prefect.logging.loggers import PrefectLogAdapter, flow_run_logger, get_logger
|
45
29
|
from prefect.plugins import load_prefect_collections
|
46
30
|
from prefect.settings import (
|
47
31
|
PREFECT_API_URL,
|
48
|
-
PREFECT_EXPERIMENTAL_WARN,
|
49
|
-
PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION,
|
50
32
|
PREFECT_TEST_MODE,
|
51
33
|
PREFECT_WORKER_HEARTBEAT_SECONDS,
|
52
34
|
PREFECT_WORKER_PREFETCH_SECONDS,
|
35
|
+
PREFECT_WORKER_QUERY_SECONDS,
|
53
36
|
get_current_settings,
|
54
37
|
)
|
55
38
|
from prefect.states import Crashed, Pending, exception_to_failed_state
|
56
39
|
from prefect.utilities.dispatch import get_registry_for_type, register_base_type
|
57
40
|
from prefect.utilities.engine import propose_state
|
41
|
+
from prefect.utilities.services import critical_service_loop
|
58
42
|
from prefect.utilities.slugify import slugify
|
59
43
|
from prefect.utilities.templating import (
|
60
44
|
apply_values,
|
@@ -237,22 +221,7 @@ class BaseJobConfiguration(BaseModel):
|
|
237
221
|
"""
|
238
222
|
Generate a command for a flow run job.
|
239
223
|
"""
|
240
|
-
|
241
|
-
if (
|
242
|
-
PREFECT_EXPERIMENTAL_WARN
|
243
|
-
and PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION
|
244
|
-
):
|
245
|
-
warnings.warn(
|
246
|
-
EXPERIMENTAL_WARNING.format(
|
247
|
-
feature="Enhanced flow run cancellation",
|
248
|
-
group="enhanced_cancellation",
|
249
|
-
help="",
|
250
|
-
),
|
251
|
-
ExperimentalFeature,
|
252
|
-
stacklevel=3,
|
253
|
-
)
|
254
|
-
return "prefect flow-run execute"
|
255
|
-
return "python -m prefect.engine"
|
224
|
+
return "prefect flow-run execute"
|
256
225
|
|
257
226
|
@staticmethod
|
258
227
|
def _base_flow_run_labels(flow_run: "FlowRun") -> Dict[str, str]:
|
@@ -413,12 +382,14 @@ class BaseWorker(abc.ABC):
|
|
413
382
|
ensure that work pools are not created accidentally.
|
414
383
|
limit: The maximum number of flow runs this worker should be running at
|
415
384
|
a given time.
|
385
|
+
heartbeat_interval_seconds: The number of seconds between worker heartbeats.
|
416
386
|
base_job_template: If creating the work pool, provide the base job
|
417
387
|
template to use. Logs a warning if the pool already exists.
|
418
388
|
"""
|
419
389
|
if name and ("/" in name or "%" in name):
|
420
390
|
raise ValueError("Worker name cannot contain '/' or '%'")
|
421
391
|
self.name = name or f"{self.__class__.__name__} {uuid4()}"
|
392
|
+
self._started_event: Optional[Event] = None
|
422
393
|
self._logger = get_logger(f"worker.{self.__class__.type}.{self.name.lower()}")
|
423
394
|
|
424
395
|
self.is_setup = False
|
@@ -435,6 +406,7 @@ class BaseWorker(abc.ABC):
|
|
435
406
|
)
|
436
407
|
|
437
408
|
self._work_pool: Optional[WorkPool] = None
|
409
|
+
self._exit_stack: AsyncExitStack = AsyncExitStack()
|
438
410
|
self._runs_task_group: Optional[anyio.abc.TaskGroup] = None
|
439
411
|
self._client: Optional[PrefectClient] = None
|
440
412
|
self._last_polled_time: pendulum.DateTime = pendulum.now("utc")
|
@@ -511,6 +483,86 @@ class BaseWorker(abc.ABC):
|
|
511
483
|
},
|
512
484
|
)
|
513
485
|
|
486
|
+
async def start(
|
487
|
+
self,
|
488
|
+
run_once: bool = False,
|
489
|
+
with_healthcheck: bool = False,
|
490
|
+
printer: Callable[..., None] = print,
|
491
|
+
):
|
492
|
+
"""
|
493
|
+
Starts the worker and runs the main worker loops.
|
494
|
+
|
495
|
+
By default, the worker will run loops to poll for scheduled/cancelled flow
|
496
|
+
runs and sync with the Prefect API server.
|
497
|
+
|
498
|
+
If `run_once` is set, the worker will only run each loop once and then return.
|
499
|
+
|
500
|
+
If `with_healthcheck` is set, the worker will start a healthcheck server which
|
501
|
+
can be used to determine if the worker is still polling for flow runs and restart
|
502
|
+
the worker if necessary.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
run_once: If set, the worker will only run each loop once then return.
|
506
|
+
with_healthcheck: If set, the worker will start a healthcheck server.
|
507
|
+
printer: A `print`-like function where logs will be reported.
|
508
|
+
"""
|
509
|
+
healthcheck_server = None
|
510
|
+
healthcheck_thread = None
|
511
|
+
try:
|
512
|
+
async with self as worker:
|
513
|
+
# wait for an initial heartbeat to configure the worker
|
514
|
+
await worker.sync_with_backend()
|
515
|
+
# schedule the scheduled flow run polling loop
|
516
|
+
async with anyio.create_task_group() as loops_task_group:
|
517
|
+
loops_task_group.start_soon(
|
518
|
+
partial(
|
519
|
+
critical_service_loop,
|
520
|
+
workload=self.get_and_submit_flow_runs,
|
521
|
+
interval=PREFECT_WORKER_QUERY_SECONDS.value(),
|
522
|
+
run_once=run_once,
|
523
|
+
jitter_range=0.3,
|
524
|
+
backoff=4, # Up to ~1 minute interval during backoff
|
525
|
+
)
|
526
|
+
)
|
527
|
+
# schedule the sync loop
|
528
|
+
loops_task_group.start_soon(
|
529
|
+
partial(
|
530
|
+
critical_service_loop,
|
531
|
+
workload=self.sync_with_backend,
|
532
|
+
interval=self.heartbeat_interval_seconds,
|
533
|
+
run_once=run_once,
|
534
|
+
jitter_range=0.3,
|
535
|
+
backoff=4,
|
536
|
+
)
|
537
|
+
)
|
538
|
+
|
539
|
+
self._started_event = await self._emit_worker_started_event()
|
540
|
+
|
541
|
+
if with_healthcheck:
|
542
|
+
from prefect.workers.server import build_healthcheck_server
|
543
|
+
|
544
|
+
# we'll start the ASGI server in a separate thread so that
|
545
|
+
# uvicorn does not block the main thread
|
546
|
+
healthcheck_server = build_healthcheck_server(
|
547
|
+
worker=worker,
|
548
|
+
query_interval_seconds=PREFECT_WORKER_QUERY_SECONDS.value(),
|
549
|
+
)
|
550
|
+
healthcheck_thread = threading.Thread(
|
551
|
+
name="healthcheck-server-thread",
|
552
|
+
target=healthcheck_server.run,
|
553
|
+
daemon=True,
|
554
|
+
)
|
555
|
+
healthcheck_thread.start()
|
556
|
+
printer(f"Worker {worker.name!r} started!")
|
557
|
+
finally:
|
558
|
+
if healthcheck_server and healthcheck_thread:
|
559
|
+
self._logger.debug("Stopping healthcheck server...")
|
560
|
+
healthcheck_server.should_exit = True
|
561
|
+
healthcheck_thread.join()
|
562
|
+
self._logger.debug("Healthcheck server stopped.")
|
563
|
+
|
564
|
+
printer(f"Worker {worker.name!r} stopped!")
|
565
|
+
|
514
566
|
@abc.abstractmethod
|
515
567
|
async def run(
|
516
568
|
self,
|
@@ -525,20 +577,6 @@ class BaseWorker(abc.ABC):
|
|
525
577
|
"Workers must implement a method for running submitted flow runs"
|
526
578
|
)
|
527
579
|
|
528
|
-
async def kill_infrastructure(
|
529
|
-
self,
|
530
|
-
infrastructure_pid: str,
|
531
|
-
configuration: BaseJobConfiguration,
|
532
|
-
grace_seconds: int = 30,
|
533
|
-
):
|
534
|
-
"""
|
535
|
-
Method for killing infrastructure created by a worker. Should be implemented by
|
536
|
-
individual workers if they support killing infrastructure.
|
537
|
-
"""
|
538
|
-
raise NotImplementedError(
|
539
|
-
"This worker does not support killing infrastructure."
|
540
|
-
)
|
541
|
-
|
542
580
|
@classmethod
|
543
581
|
def __dispatch_key__(cls):
|
544
582
|
if cls.__name__ == "BaseWorker":
|
@@ -557,8 +595,8 @@ class BaseWorker(abc.ABC):
|
|
557
595
|
raise ValueError("`PREFECT_API_URL` must be set to start a Worker.")
|
558
596
|
|
559
597
|
self._client = get_client()
|
560
|
-
await self.
|
561
|
-
await self.
|
598
|
+
await self._exit_stack.enter_async_context(self._client)
|
599
|
+
await self._exit_stack.enter_async_context(self._runs_task_group)
|
562
600
|
|
563
601
|
self.is_setup = True
|
564
602
|
|
@@ -568,14 +606,14 @@ class BaseWorker(abc.ABC):
|
|
568
606
|
self.is_setup = False
|
569
607
|
for scope in self._scheduled_task_scopes:
|
570
608
|
scope.cancel()
|
571
|
-
|
572
|
-
|
573
|
-
if self.
|
574
|
-
await self.
|
609
|
+
|
610
|
+
await self._exit_stack.__aexit__(*exc_info)
|
611
|
+
if self._started_event:
|
612
|
+
await self._emit_worker_stopped_event(self._started_event)
|
575
613
|
self._runs_task_group = None
|
576
614
|
self._client = None
|
577
615
|
|
578
|
-
def is_worker_still_polling(self, query_interval_seconds:
|
616
|
+
def is_worker_still_polling(self, query_interval_seconds: float) -> bool:
|
579
617
|
"""
|
580
618
|
This method is invoked by a webserver healthcheck handler
|
581
619
|
and returns a boolean indicating if the worker has recorded a
|
@@ -611,138 +649,6 @@ class BaseWorker(abc.ABC):
|
|
611
649
|
|
612
650
|
return await self._submit_scheduled_flow_runs(flow_run_response=runs_response)
|
613
651
|
|
614
|
-
async def check_for_cancelled_flow_runs(self):
|
615
|
-
if not self.is_setup:
|
616
|
-
raise RuntimeError(
|
617
|
-
"Worker is not set up. Please make sure you are running this worker "
|
618
|
-
"as an async context manager."
|
619
|
-
)
|
620
|
-
|
621
|
-
self._logger.debug("Checking for cancelled flow runs...")
|
622
|
-
|
623
|
-
work_queue_filter = (
|
624
|
-
WorkQueueFilter(name=WorkQueueFilterName(any_=list(self._work_queues)))
|
625
|
-
if self._work_queues
|
626
|
-
else None
|
627
|
-
)
|
628
|
-
|
629
|
-
named_cancelling_flow_runs = await self._client.read_flow_runs(
|
630
|
-
flow_run_filter=FlowRunFilter(
|
631
|
-
state=FlowRunFilterState(
|
632
|
-
type=FlowRunFilterStateType(any_=[StateType.CANCELLED]),
|
633
|
-
name=FlowRunFilterStateName(any_=["Cancelling"]),
|
634
|
-
),
|
635
|
-
# Avoid duplicate cancellation calls
|
636
|
-
id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
|
637
|
-
),
|
638
|
-
work_pool_filter=WorkPoolFilter(
|
639
|
-
name=WorkPoolFilterName(any_=[self._work_pool_name])
|
640
|
-
),
|
641
|
-
work_queue_filter=work_queue_filter,
|
642
|
-
)
|
643
|
-
|
644
|
-
typed_cancelling_flow_runs = await self._client.read_flow_runs(
|
645
|
-
flow_run_filter=FlowRunFilter(
|
646
|
-
state=FlowRunFilterState(
|
647
|
-
type=FlowRunFilterStateType(any_=[StateType.CANCELLING]),
|
648
|
-
),
|
649
|
-
# Avoid duplicate cancellation calls
|
650
|
-
id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
|
651
|
-
),
|
652
|
-
work_pool_filter=WorkPoolFilter(
|
653
|
-
name=WorkPoolFilterName(any_=[self._work_pool_name])
|
654
|
-
),
|
655
|
-
work_queue_filter=work_queue_filter,
|
656
|
-
)
|
657
|
-
|
658
|
-
cancelling_flow_runs = named_cancelling_flow_runs + typed_cancelling_flow_runs
|
659
|
-
|
660
|
-
if cancelling_flow_runs:
|
661
|
-
self._logger.info(
|
662
|
-
f"Found {len(cancelling_flow_runs)} flow runs awaiting cancellation."
|
663
|
-
)
|
664
|
-
|
665
|
-
for flow_run in cancelling_flow_runs:
|
666
|
-
self._cancelling_flow_run_ids.add(flow_run.id)
|
667
|
-
self._runs_task_group.start_soon(self.cancel_run, flow_run)
|
668
|
-
|
669
|
-
return cancelling_flow_runs
|
670
|
-
|
671
|
-
async def cancel_run(self, flow_run: "FlowRun"):
|
672
|
-
run_logger = self.get_flow_run_logger(flow_run)
|
673
|
-
|
674
|
-
try:
|
675
|
-
configuration = await self._get_configuration(flow_run)
|
676
|
-
except ObjectNotFound:
|
677
|
-
self._logger.warning(
|
678
|
-
f"Flow run {flow_run.id!r} cannot be cancelled by this worker:"
|
679
|
-
f" associated deployment {flow_run.deployment_id!r} does not exist."
|
680
|
-
)
|
681
|
-
await self._mark_flow_run_as_cancelled(
|
682
|
-
flow_run,
|
683
|
-
state_updates={
|
684
|
-
"message": (
|
685
|
-
"This flow run is missing infrastructure configuration information"
|
686
|
-
" and cancellation cannot be guaranteed."
|
687
|
-
)
|
688
|
-
},
|
689
|
-
)
|
690
|
-
return
|
691
|
-
else:
|
692
|
-
if configuration.is_using_a_runner:
|
693
|
-
self._logger.info(
|
694
|
-
f"Skipping cancellation because flow run {str(flow_run.id)!r} is"
|
695
|
-
" using enhanced cancellation. A dedicated runner will handle"
|
696
|
-
" cancellation."
|
697
|
-
)
|
698
|
-
return
|
699
|
-
|
700
|
-
if not flow_run.infrastructure_pid:
|
701
|
-
run_logger.error(
|
702
|
-
f"Flow run '{flow_run.id}' does not have an infrastructure pid"
|
703
|
-
" attached. Cancellation cannot be guaranteed."
|
704
|
-
)
|
705
|
-
await self._mark_flow_run_as_cancelled(
|
706
|
-
flow_run,
|
707
|
-
state_updates={
|
708
|
-
"message": (
|
709
|
-
"This flow run is missing infrastructure tracking information"
|
710
|
-
" and cancellation cannot be guaranteed."
|
711
|
-
)
|
712
|
-
},
|
713
|
-
)
|
714
|
-
return
|
715
|
-
|
716
|
-
try:
|
717
|
-
await self.kill_infrastructure(
|
718
|
-
infrastructure_pid=flow_run.infrastructure_pid,
|
719
|
-
configuration=configuration,
|
720
|
-
)
|
721
|
-
except NotImplementedError:
|
722
|
-
self._logger.error(
|
723
|
-
f"Worker type {self.type!r} does not support killing created "
|
724
|
-
"infrastructure. Cancellation cannot be guaranteed."
|
725
|
-
)
|
726
|
-
except InfrastructureNotFound as exc:
|
727
|
-
self._logger.warning(f"{exc} Marking flow run as cancelled.")
|
728
|
-
await self._mark_flow_run_as_cancelled(flow_run)
|
729
|
-
except InfrastructureNotAvailable as exc:
|
730
|
-
self._logger.warning(f"{exc} Flow run cannot be cancelled by this worker.")
|
731
|
-
except Exception:
|
732
|
-
run_logger.exception(
|
733
|
-
"Encountered exception while killing infrastructure for flow run "
|
734
|
-
f"'{flow_run.id}'. Flow run may not be cancelled."
|
735
|
-
)
|
736
|
-
# We will try again on generic exceptions
|
737
|
-
self._cancelling_flow_run_ids.remove(flow_run.id)
|
738
|
-
return
|
739
|
-
else:
|
740
|
-
self._emit_flow_run_cancelled_event(
|
741
|
-
flow_run=flow_run, configuration=configuration
|
742
|
-
)
|
743
|
-
await self._mark_flow_run_as_cancelled(flow_run)
|
744
|
-
run_logger.info(f"Cancelled flow run '{flow_run.id}'!")
|
745
|
-
|
746
652
|
async def _update_local_work_pool_info(self):
|
747
653
|
try:
|
748
654
|
work_pool = await self._client.read_work_pool(
|
@@ -1162,6 +1068,7 @@ class BaseWorker(abc.ABC):
|
|
1162
1068
|
async def __aenter__(self):
|
1163
1069
|
self._logger.debug("Entering worker context...")
|
1164
1070
|
await self.setup()
|
1071
|
+
|
1165
1072
|
return self
|
1166
1073
|
|
1167
1074
|
async def __aexit__(self, *exc_info):
|
@@ -1245,20 +1152,3 @@ class BaseWorker(abc.ABC):
|
|
1245
1152
|
related=self._event_related_resources(),
|
1246
1153
|
follows=started_event,
|
1247
1154
|
)
|
1248
|
-
|
1249
|
-
def _emit_flow_run_cancelled_event(
|
1250
|
-
self, flow_run: "FlowRun", configuration: BaseJobConfiguration
|
1251
|
-
):
|
1252
|
-
related = self._event_related_resources(configuration=configuration)
|
1253
|
-
|
1254
|
-
for resource in related:
|
1255
|
-
if resource.role == "flow-run":
|
1256
|
-
resource["prefect.infrastructure.identifier"] = str(
|
1257
|
-
flow_run.infrastructure_pid
|
1258
|
-
)
|
1259
|
-
|
1260
|
-
emit_event(
|
1261
|
-
event="prefect.worker.cancelled-flow-run",
|
1262
|
-
resource=self._event_resource(),
|
1263
|
-
related=related,
|
1264
|
-
)
|
prefect/workers/block.py
ADDED