prefect-client 3.0.0rc10__py3-none-any.whl → 3.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prefect/_internal/concurrency/api.py +1 -1
- prefect/_internal/retries.py +61 -0
- prefect/client/cloud.py +1 -1
- prefect/client/schemas/objects.py +1 -1
- prefect/concurrency/asyncio.py +3 -3
- prefect/concurrency/events.py +1 -1
- prefect/concurrency/services.py +3 -2
- prefect/concurrency/sync.py +19 -5
- prefect/context.py +8 -2
- prefect/deployments/__init__.py +28 -15
- prefect/deployments/steps/pull.py +7 -0
- prefect/flow_engine.py +5 -7
- prefect/flows.py +179 -65
- prefect/futures.py +53 -7
- prefect/logging/loggers.py +1 -1
- prefect/runner/runner.py +93 -20
- prefect/runner/server.py +20 -22
- prefect/runner/submit.py +0 -8
- prefect/runtime/flow_run.py +38 -3
- prefect/settings.py +9 -13
- prefect/task_worker.py +1 -1
- prefect/transactions.py +16 -0
- prefect/utilities/asyncutils.py +1 -0
- prefect/utilities/engine.py +34 -1
- prefect/workers/base.py +98 -208
- prefect/workers/process.py +262 -4
- prefect/workers/server.py +27 -9
- {prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/METADATA +3 -3
- {prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/RECORD +32 -31
- {prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/LICENSE +0 -0
- {prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/WHEEL +0 -0
- {prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/top_level.txt +0 -0
prefect/workers/base.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
import abc
|
2
2
|
import inspect
|
3
|
-
import
|
4
|
-
from
|
3
|
+
import threading
|
4
|
+
from contextlib import AsyncExitStack
|
5
|
+
from functools import partial
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Type, Union
|
5
7
|
from uuid import uuid4
|
6
8
|
|
7
9
|
import anyio
|
@@ -12,49 +14,31 @@ from pydantic.json_schema import GenerateJsonSchema
|
|
12
14
|
from typing_extensions import Literal
|
13
15
|
|
14
16
|
import prefect
|
15
|
-
from prefect._internal.compatibility.experimental import (
|
16
|
-
EXPERIMENTAL_WARNING,
|
17
|
-
ExperimentalFeature,
|
18
|
-
experiment_enabled,
|
19
|
-
)
|
20
17
|
from prefect._internal.schemas.validators import return_v_or_none
|
21
18
|
from prefect.client.orchestration import PrefectClient, get_client
|
22
19
|
from prefect.client.schemas.actions import WorkPoolCreate, WorkPoolUpdate
|
23
|
-
from prefect.client.schemas.filters import (
|
24
|
-
FlowRunFilter,
|
25
|
-
FlowRunFilterId,
|
26
|
-
FlowRunFilterState,
|
27
|
-
FlowRunFilterStateName,
|
28
|
-
FlowRunFilterStateType,
|
29
|
-
WorkPoolFilter,
|
30
|
-
WorkPoolFilterName,
|
31
|
-
WorkQueueFilter,
|
32
|
-
WorkQueueFilterName,
|
33
|
-
)
|
34
20
|
from prefect.client.schemas.objects import StateType, WorkPool
|
35
21
|
from prefect.client.utilities import inject_client
|
36
22
|
from prefect.events import Event, RelatedResource, emit_event
|
37
23
|
from prefect.events.related import object_as_related_resource, tags_as_related_resources
|
38
24
|
from prefect.exceptions import (
|
39
25
|
Abort,
|
40
|
-
InfrastructureNotAvailable,
|
41
|
-
InfrastructureNotFound,
|
42
26
|
ObjectNotFound,
|
43
27
|
)
|
44
28
|
from prefect.logging.loggers import PrefectLogAdapter, flow_run_logger, get_logger
|
45
29
|
from prefect.plugins import load_prefect_collections
|
46
30
|
from prefect.settings import (
|
47
31
|
PREFECT_API_URL,
|
48
|
-
PREFECT_EXPERIMENTAL_WARN,
|
49
|
-
PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION,
|
50
32
|
PREFECT_TEST_MODE,
|
51
33
|
PREFECT_WORKER_HEARTBEAT_SECONDS,
|
52
34
|
PREFECT_WORKER_PREFETCH_SECONDS,
|
35
|
+
PREFECT_WORKER_QUERY_SECONDS,
|
53
36
|
get_current_settings,
|
54
37
|
)
|
55
38
|
from prefect.states import Crashed, Pending, exception_to_failed_state
|
56
39
|
from prefect.utilities.dispatch import get_registry_for_type, register_base_type
|
57
40
|
from prefect.utilities.engine import propose_state
|
41
|
+
from prefect.utilities.services import critical_service_loop
|
58
42
|
from prefect.utilities.slugify import slugify
|
59
43
|
from prefect.utilities.templating import (
|
60
44
|
apply_values,
|
@@ -237,22 +221,7 @@ class BaseJobConfiguration(BaseModel):
|
|
237
221
|
"""
|
238
222
|
Generate a command for a flow run job.
|
239
223
|
"""
|
240
|
-
|
241
|
-
if (
|
242
|
-
PREFECT_EXPERIMENTAL_WARN
|
243
|
-
and PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION
|
244
|
-
):
|
245
|
-
warnings.warn(
|
246
|
-
EXPERIMENTAL_WARNING.format(
|
247
|
-
feature="Enhanced flow run cancellation",
|
248
|
-
group="enhanced_cancellation",
|
249
|
-
help="",
|
250
|
-
),
|
251
|
-
ExperimentalFeature,
|
252
|
-
stacklevel=3,
|
253
|
-
)
|
254
|
-
return "prefect flow-run execute"
|
255
|
-
return "python -m prefect.engine"
|
224
|
+
return "prefect flow-run execute"
|
256
225
|
|
257
226
|
@staticmethod
|
258
227
|
def _base_flow_run_labels(flow_run: "FlowRun") -> Dict[str, str]:
|
@@ -413,12 +382,14 @@ class BaseWorker(abc.ABC):
|
|
413
382
|
ensure that work pools are not created accidentally.
|
414
383
|
limit: The maximum number of flow runs this worker should be running at
|
415
384
|
a given time.
|
385
|
+
heartbeat_interval_seconds: The number of seconds between worker heartbeats.
|
416
386
|
base_job_template: If creating the work pool, provide the base job
|
417
387
|
template to use. Logs a warning if the pool already exists.
|
418
388
|
"""
|
419
389
|
if name and ("/" in name or "%" in name):
|
420
390
|
raise ValueError("Worker name cannot contain '/' or '%'")
|
421
391
|
self.name = name or f"{self.__class__.__name__} {uuid4()}"
|
392
|
+
self._started_event: Optional[Event] = None
|
422
393
|
self._logger = get_logger(f"worker.{self.__class__.type}.{self.name.lower()}")
|
423
394
|
|
424
395
|
self.is_setup = False
|
@@ -435,6 +406,7 @@ class BaseWorker(abc.ABC):
|
|
435
406
|
)
|
436
407
|
|
437
408
|
self._work_pool: Optional[WorkPool] = None
|
409
|
+
self._exit_stack: AsyncExitStack = AsyncExitStack()
|
438
410
|
self._runs_task_group: Optional[anyio.abc.TaskGroup] = None
|
439
411
|
self._client: Optional[PrefectClient] = None
|
440
412
|
self._last_polled_time: pendulum.DateTime = pendulum.now("utc")
|
@@ -511,6 +483,86 @@ class BaseWorker(abc.ABC):
|
|
511
483
|
},
|
512
484
|
)
|
513
485
|
|
486
|
+
async def start(
|
487
|
+
self,
|
488
|
+
run_once: bool = False,
|
489
|
+
with_healthcheck: bool = False,
|
490
|
+
printer: Callable[..., None] = print,
|
491
|
+
):
|
492
|
+
"""
|
493
|
+
Starts the worker and runs the main worker loops.
|
494
|
+
|
495
|
+
By default, the worker will run loops to poll for scheduled/cancelled flow
|
496
|
+
runs and sync with the Prefect API server.
|
497
|
+
|
498
|
+
If `run_once` is set, the worker will only run each loop once and then return.
|
499
|
+
|
500
|
+
If `with_healthcheck` is set, the worker will start a healthcheck server which
|
501
|
+
can be used to determine if the worker is still polling for flow runs and restart
|
502
|
+
the worker if necessary.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
run_once: If set, the worker will only run each loop once then return.
|
506
|
+
with_healthcheck: If set, the worker will start a healthcheck server.
|
507
|
+
printer: A `print`-like function where logs will be reported.
|
508
|
+
"""
|
509
|
+
healthcheck_server = None
|
510
|
+
healthcheck_thread = None
|
511
|
+
try:
|
512
|
+
async with self as worker:
|
513
|
+
# wait for an initial heartbeat to configure the worker
|
514
|
+
await worker.sync_with_backend()
|
515
|
+
# schedule the scheduled flow run polling loop
|
516
|
+
async with anyio.create_task_group() as loops_task_group:
|
517
|
+
loops_task_group.start_soon(
|
518
|
+
partial(
|
519
|
+
critical_service_loop,
|
520
|
+
workload=self.get_and_submit_flow_runs,
|
521
|
+
interval=PREFECT_WORKER_QUERY_SECONDS.value(),
|
522
|
+
run_once=run_once,
|
523
|
+
jitter_range=0.3,
|
524
|
+
backoff=4, # Up to ~1 minute interval during backoff
|
525
|
+
)
|
526
|
+
)
|
527
|
+
# schedule the sync loop
|
528
|
+
loops_task_group.start_soon(
|
529
|
+
partial(
|
530
|
+
critical_service_loop,
|
531
|
+
workload=self.sync_with_backend,
|
532
|
+
interval=self.heartbeat_interval_seconds,
|
533
|
+
run_once=run_once,
|
534
|
+
jitter_range=0.3,
|
535
|
+
backoff=4,
|
536
|
+
)
|
537
|
+
)
|
538
|
+
|
539
|
+
self._started_event = await self._emit_worker_started_event()
|
540
|
+
|
541
|
+
if with_healthcheck:
|
542
|
+
from prefect.workers.server import build_healthcheck_server
|
543
|
+
|
544
|
+
# we'll start the ASGI server in a separate thread so that
|
545
|
+
# uvicorn does not block the main thread
|
546
|
+
healthcheck_server = build_healthcheck_server(
|
547
|
+
worker=worker,
|
548
|
+
query_interval_seconds=PREFECT_WORKER_QUERY_SECONDS.value(),
|
549
|
+
)
|
550
|
+
healthcheck_thread = threading.Thread(
|
551
|
+
name="healthcheck-server-thread",
|
552
|
+
target=healthcheck_server.run,
|
553
|
+
daemon=True,
|
554
|
+
)
|
555
|
+
healthcheck_thread.start()
|
556
|
+
printer(f"Worker {worker.name!r} started!")
|
557
|
+
finally:
|
558
|
+
if healthcheck_server and healthcheck_thread:
|
559
|
+
self._logger.debug("Stopping healthcheck server...")
|
560
|
+
healthcheck_server.should_exit = True
|
561
|
+
healthcheck_thread.join()
|
562
|
+
self._logger.debug("Healthcheck server stopped.")
|
563
|
+
|
564
|
+
printer(f"Worker {worker.name!r} stopped!")
|
565
|
+
|
514
566
|
@abc.abstractmethod
|
515
567
|
async def run(
|
516
568
|
self,
|
@@ -525,20 +577,6 @@ class BaseWorker(abc.ABC):
|
|
525
577
|
"Workers must implement a method for running submitted flow runs"
|
526
578
|
)
|
527
579
|
|
528
|
-
async def kill_infrastructure(
|
529
|
-
self,
|
530
|
-
infrastructure_pid: str,
|
531
|
-
configuration: BaseJobConfiguration,
|
532
|
-
grace_seconds: int = 30,
|
533
|
-
):
|
534
|
-
"""
|
535
|
-
Method for killing infrastructure created by a worker. Should be implemented by
|
536
|
-
individual workers if they support killing infrastructure.
|
537
|
-
"""
|
538
|
-
raise NotImplementedError(
|
539
|
-
"This worker does not support killing infrastructure."
|
540
|
-
)
|
541
|
-
|
542
580
|
@classmethod
|
543
581
|
def __dispatch_key__(cls):
|
544
582
|
if cls.__name__ == "BaseWorker":
|
@@ -557,8 +595,8 @@ class BaseWorker(abc.ABC):
|
|
557
595
|
raise ValueError("`PREFECT_API_URL` must be set to start a Worker.")
|
558
596
|
|
559
597
|
self._client = get_client()
|
560
|
-
await self.
|
561
|
-
await self.
|
598
|
+
await self._exit_stack.enter_async_context(self._client)
|
599
|
+
await self._exit_stack.enter_async_context(self._runs_task_group)
|
562
600
|
|
563
601
|
self.is_setup = True
|
564
602
|
|
@@ -568,14 +606,14 @@ class BaseWorker(abc.ABC):
|
|
568
606
|
self.is_setup = False
|
569
607
|
for scope in self._scheduled_task_scopes:
|
570
608
|
scope.cancel()
|
571
|
-
|
572
|
-
|
573
|
-
if self.
|
574
|
-
await self.
|
609
|
+
|
610
|
+
await self._exit_stack.__aexit__(*exc_info)
|
611
|
+
if self._started_event:
|
612
|
+
await self._emit_worker_stopped_event(self._started_event)
|
575
613
|
self._runs_task_group = None
|
576
614
|
self._client = None
|
577
615
|
|
578
|
-
def is_worker_still_polling(self, query_interval_seconds:
|
616
|
+
def is_worker_still_polling(self, query_interval_seconds: float) -> bool:
|
579
617
|
"""
|
580
618
|
This method is invoked by a webserver healthcheck handler
|
581
619
|
and returns a boolean indicating if the worker has recorded a
|
@@ -611,138 +649,6 @@ class BaseWorker(abc.ABC):
|
|
611
649
|
|
612
650
|
return await self._submit_scheduled_flow_runs(flow_run_response=runs_response)
|
613
651
|
|
614
|
-
async def check_for_cancelled_flow_runs(self):
|
615
|
-
if not self.is_setup:
|
616
|
-
raise RuntimeError(
|
617
|
-
"Worker is not set up. Please make sure you are running this worker "
|
618
|
-
"as an async context manager."
|
619
|
-
)
|
620
|
-
|
621
|
-
self._logger.debug("Checking for cancelled flow runs...")
|
622
|
-
|
623
|
-
work_queue_filter = (
|
624
|
-
WorkQueueFilter(name=WorkQueueFilterName(any_=list(self._work_queues)))
|
625
|
-
if self._work_queues
|
626
|
-
else None
|
627
|
-
)
|
628
|
-
|
629
|
-
named_cancelling_flow_runs = await self._client.read_flow_runs(
|
630
|
-
flow_run_filter=FlowRunFilter(
|
631
|
-
state=FlowRunFilterState(
|
632
|
-
type=FlowRunFilterStateType(any_=[StateType.CANCELLED]),
|
633
|
-
name=FlowRunFilterStateName(any_=["Cancelling"]),
|
634
|
-
),
|
635
|
-
# Avoid duplicate cancellation calls
|
636
|
-
id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
|
637
|
-
),
|
638
|
-
work_pool_filter=WorkPoolFilter(
|
639
|
-
name=WorkPoolFilterName(any_=[self._work_pool_name])
|
640
|
-
),
|
641
|
-
work_queue_filter=work_queue_filter,
|
642
|
-
)
|
643
|
-
|
644
|
-
typed_cancelling_flow_runs = await self._client.read_flow_runs(
|
645
|
-
flow_run_filter=FlowRunFilter(
|
646
|
-
state=FlowRunFilterState(
|
647
|
-
type=FlowRunFilterStateType(any_=[StateType.CANCELLING]),
|
648
|
-
),
|
649
|
-
# Avoid duplicate cancellation calls
|
650
|
-
id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
|
651
|
-
),
|
652
|
-
work_pool_filter=WorkPoolFilter(
|
653
|
-
name=WorkPoolFilterName(any_=[self._work_pool_name])
|
654
|
-
),
|
655
|
-
work_queue_filter=work_queue_filter,
|
656
|
-
)
|
657
|
-
|
658
|
-
cancelling_flow_runs = named_cancelling_flow_runs + typed_cancelling_flow_runs
|
659
|
-
|
660
|
-
if cancelling_flow_runs:
|
661
|
-
self._logger.info(
|
662
|
-
f"Found {len(cancelling_flow_runs)} flow runs awaiting cancellation."
|
663
|
-
)
|
664
|
-
|
665
|
-
for flow_run in cancelling_flow_runs:
|
666
|
-
self._cancelling_flow_run_ids.add(flow_run.id)
|
667
|
-
self._runs_task_group.start_soon(self.cancel_run, flow_run)
|
668
|
-
|
669
|
-
return cancelling_flow_runs
|
670
|
-
|
671
|
-
async def cancel_run(self, flow_run: "FlowRun"):
|
672
|
-
run_logger = self.get_flow_run_logger(flow_run)
|
673
|
-
|
674
|
-
try:
|
675
|
-
configuration = await self._get_configuration(flow_run)
|
676
|
-
except ObjectNotFound:
|
677
|
-
self._logger.warning(
|
678
|
-
f"Flow run {flow_run.id!r} cannot be cancelled by this worker:"
|
679
|
-
f" associated deployment {flow_run.deployment_id!r} does not exist."
|
680
|
-
)
|
681
|
-
await self._mark_flow_run_as_cancelled(
|
682
|
-
flow_run,
|
683
|
-
state_updates={
|
684
|
-
"message": (
|
685
|
-
"This flow run is missing infrastructure configuration information"
|
686
|
-
" and cancellation cannot be guaranteed."
|
687
|
-
)
|
688
|
-
},
|
689
|
-
)
|
690
|
-
return
|
691
|
-
else:
|
692
|
-
if configuration.is_using_a_runner:
|
693
|
-
self._logger.info(
|
694
|
-
f"Skipping cancellation because flow run {str(flow_run.id)!r} is"
|
695
|
-
" using enhanced cancellation. A dedicated runner will handle"
|
696
|
-
" cancellation."
|
697
|
-
)
|
698
|
-
return
|
699
|
-
|
700
|
-
if not flow_run.infrastructure_pid:
|
701
|
-
run_logger.error(
|
702
|
-
f"Flow run '{flow_run.id}' does not have an infrastructure pid"
|
703
|
-
" attached. Cancellation cannot be guaranteed."
|
704
|
-
)
|
705
|
-
await self._mark_flow_run_as_cancelled(
|
706
|
-
flow_run,
|
707
|
-
state_updates={
|
708
|
-
"message": (
|
709
|
-
"This flow run is missing infrastructure tracking information"
|
710
|
-
" and cancellation cannot be guaranteed."
|
711
|
-
)
|
712
|
-
},
|
713
|
-
)
|
714
|
-
return
|
715
|
-
|
716
|
-
try:
|
717
|
-
await self.kill_infrastructure(
|
718
|
-
infrastructure_pid=flow_run.infrastructure_pid,
|
719
|
-
configuration=configuration,
|
720
|
-
)
|
721
|
-
except NotImplementedError:
|
722
|
-
self._logger.error(
|
723
|
-
f"Worker type {self.type!r} does not support killing created "
|
724
|
-
"infrastructure. Cancellation cannot be guaranteed."
|
725
|
-
)
|
726
|
-
except InfrastructureNotFound as exc:
|
727
|
-
self._logger.warning(f"{exc} Marking flow run as cancelled.")
|
728
|
-
await self._mark_flow_run_as_cancelled(flow_run)
|
729
|
-
except InfrastructureNotAvailable as exc:
|
730
|
-
self._logger.warning(f"{exc} Flow run cannot be cancelled by this worker.")
|
731
|
-
except Exception:
|
732
|
-
run_logger.exception(
|
733
|
-
"Encountered exception while killing infrastructure for flow run "
|
734
|
-
f"'{flow_run.id}'. Flow run may not be cancelled."
|
735
|
-
)
|
736
|
-
# We will try again on generic exceptions
|
737
|
-
self._cancelling_flow_run_ids.remove(flow_run.id)
|
738
|
-
return
|
739
|
-
else:
|
740
|
-
self._emit_flow_run_cancelled_event(
|
741
|
-
flow_run=flow_run, configuration=configuration
|
742
|
-
)
|
743
|
-
await self._mark_flow_run_as_cancelled(flow_run)
|
744
|
-
run_logger.info(f"Cancelled flow run '{flow_run.id}'!")
|
745
|
-
|
746
652
|
async def _update_local_work_pool_info(self):
|
747
653
|
try:
|
748
654
|
work_pool = await self._client.read_work_pool(
|
@@ -1162,6 +1068,7 @@ class BaseWorker(abc.ABC):
|
|
1162
1068
|
async def __aenter__(self):
|
1163
1069
|
self._logger.debug("Entering worker context...")
|
1164
1070
|
await self.setup()
|
1071
|
+
|
1165
1072
|
return self
|
1166
1073
|
|
1167
1074
|
async def __aexit__(self, *exc_info):
|
@@ -1245,20 +1152,3 @@ class BaseWorker(abc.ABC):
|
|
1245
1152
|
related=self._event_related_resources(),
|
1246
1153
|
follows=started_event,
|
1247
1154
|
)
|
1248
|
-
|
1249
|
-
def _emit_flow_run_cancelled_event(
|
1250
|
-
self, flow_run: "FlowRun", configuration: BaseJobConfiguration
|
1251
|
-
):
|
1252
|
-
related = self._event_related_resources(configuration=configuration)
|
1253
|
-
|
1254
|
-
for resource in related:
|
1255
|
-
if resource.role == "flow-run":
|
1256
|
-
resource["prefect.infrastructure.identifier"] = str(
|
1257
|
-
flow_run.infrastructure_pid
|
1258
|
-
)
|
1259
|
-
|
1260
|
-
emit_event(
|
1261
|
-
event="prefect.worker.cancelled-flow-run",
|
1262
|
-
resource=self._event_resource(),
|
1263
|
-
related=related,
|
1264
|
-
)
|