prefect-client 3.0.0rc10__py3-none-any.whl → 3.0.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prefect/workers/base.py CHANGED
@@ -1,7 +1,9 @@
1
1
  import abc
2
2
  import inspect
3
- import warnings
4
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, Union
3
+ import threading
4
+ from contextlib import AsyncExitStack
5
+ from functools import partial
6
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Type, Union
5
7
  from uuid import uuid4
6
8
 
7
9
  import anyio
@@ -12,49 +14,31 @@ from pydantic.json_schema import GenerateJsonSchema
12
14
  from typing_extensions import Literal
13
15
 
14
16
  import prefect
15
- from prefect._internal.compatibility.experimental import (
16
- EXPERIMENTAL_WARNING,
17
- ExperimentalFeature,
18
- experiment_enabled,
19
- )
20
17
  from prefect._internal.schemas.validators import return_v_or_none
21
18
  from prefect.client.orchestration import PrefectClient, get_client
22
19
  from prefect.client.schemas.actions import WorkPoolCreate, WorkPoolUpdate
23
- from prefect.client.schemas.filters import (
24
- FlowRunFilter,
25
- FlowRunFilterId,
26
- FlowRunFilterState,
27
- FlowRunFilterStateName,
28
- FlowRunFilterStateType,
29
- WorkPoolFilter,
30
- WorkPoolFilterName,
31
- WorkQueueFilter,
32
- WorkQueueFilterName,
33
- )
34
20
  from prefect.client.schemas.objects import StateType, WorkPool
35
21
  from prefect.client.utilities import inject_client
36
22
  from prefect.events import Event, RelatedResource, emit_event
37
23
  from prefect.events.related import object_as_related_resource, tags_as_related_resources
38
24
  from prefect.exceptions import (
39
25
  Abort,
40
- InfrastructureNotAvailable,
41
- InfrastructureNotFound,
42
26
  ObjectNotFound,
43
27
  )
44
28
  from prefect.logging.loggers import PrefectLogAdapter, flow_run_logger, get_logger
45
29
  from prefect.plugins import load_prefect_collections
46
30
  from prefect.settings import (
47
31
  PREFECT_API_URL,
48
- PREFECT_EXPERIMENTAL_WARN,
49
- PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION,
50
32
  PREFECT_TEST_MODE,
51
33
  PREFECT_WORKER_HEARTBEAT_SECONDS,
52
34
  PREFECT_WORKER_PREFETCH_SECONDS,
35
+ PREFECT_WORKER_QUERY_SECONDS,
53
36
  get_current_settings,
54
37
  )
55
38
  from prefect.states import Crashed, Pending, exception_to_failed_state
56
39
  from prefect.utilities.dispatch import get_registry_for_type, register_base_type
57
40
  from prefect.utilities.engine import propose_state
41
+ from prefect.utilities.services import critical_service_loop
58
42
  from prefect.utilities.slugify import slugify
59
43
  from prefect.utilities.templating import (
60
44
  apply_values,
@@ -237,22 +221,7 @@ class BaseJobConfiguration(BaseModel):
237
221
  """
238
222
  Generate a command for a flow run job.
239
223
  """
240
- if experiment_enabled("enhanced_cancellation"):
241
- if (
242
- PREFECT_EXPERIMENTAL_WARN
243
- and PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION
244
- ):
245
- warnings.warn(
246
- EXPERIMENTAL_WARNING.format(
247
- feature="Enhanced flow run cancellation",
248
- group="enhanced_cancellation",
249
- help="",
250
- ),
251
- ExperimentalFeature,
252
- stacklevel=3,
253
- )
254
- return "prefect flow-run execute"
255
- return "python -m prefect.engine"
224
+ return "prefect flow-run execute"
256
225
 
257
226
  @staticmethod
258
227
  def _base_flow_run_labels(flow_run: "FlowRun") -> Dict[str, str]:
@@ -413,12 +382,14 @@ class BaseWorker(abc.ABC):
413
382
  ensure that work pools are not created accidentally.
414
383
  limit: The maximum number of flow runs this worker should be running at
415
384
  a given time.
385
+ heartbeat_interval_seconds: The number of seconds between worker heartbeats.
416
386
  base_job_template: If creating the work pool, provide the base job
417
387
  template to use. Logs a warning if the pool already exists.
418
388
  """
419
389
  if name and ("/" in name or "%" in name):
420
390
  raise ValueError("Worker name cannot contain '/' or '%'")
421
391
  self.name = name or f"{self.__class__.__name__} {uuid4()}"
392
+ self._started_event: Optional[Event] = None
422
393
  self._logger = get_logger(f"worker.{self.__class__.type}.{self.name.lower()}")
423
394
 
424
395
  self.is_setup = False
@@ -435,6 +406,7 @@ class BaseWorker(abc.ABC):
435
406
  )
436
407
 
437
408
  self._work_pool: Optional[WorkPool] = None
409
+ self._exit_stack: AsyncExitStack = AsyncExitStack()
438
410
  self._runs_task_group: Optional[anyio.abc.TaskGroup] = None
439
411
  self._client: Optional[PrefectClient] = None
440
412
  self._last_polled_time: pendulum.DateTime = pendulum.now("utc")
@@ -511,6 +483,86 @@ class BaseWorker(abc.ABC):
511
483
  },
512
484
  )
513
485
 
486
+ async def start(
487
+ self,
488
+ run_once: bool = False,
489
+ with_healthcheck: bool = False,
490
+ printer: Callable[..., None] = print,
491
+ ):
492
+ """
493
+ Starts the worker and runs the main worker loops.
494
+
495
+ By default, the worker will run loops to poll for scheduled/cancelled flow
496
+ runs and sync with the Prefect API server.
497
+
498
+ If `run_once` is set, the worker will only run each loop once and then return.
499
+
500
+ If `with_healthcheck` is set, the worker will start a healthcheck server which
501
+ can be used to determine if the worker is still polling for flow runs and restart
502
+ the worker if necessary.
503
+
504
+ Args:
505
+ run_once: If set, the worker will only run each loop once then return.
506
+ with_healthcheck: If set, the worker will start a healthcheck server.
507
+ printer: A `print`-like function where logs will be reported.
508
+ """
509
+ healthcheck_server = None
510
+ healthcheck_thread = None
511
+ try:
512
+ async with self as worker:
513
+ # wait for an initial heartbeat to configure the worker
514
+ await worker.sync_with_backend()
515
+ # schedule the scheduled flow run polling loop
516
+ async with anyio.create_task_group() as loops_task_group:
517
+ loops_task_group.start_soon(
518
+ partial(
519
+ critical_service_loop,
520
+ workload=self.get_and_submit_flow_runs,
521
+ interval=PREFECT_WORKER_QUERY_SECONDS.value(),
522
+ run_once=run_once,
523
+ jitter_range=0.3,
524
+ backoff=4, # Up to ~1 minute interval during backoff
525
+ )
526
+ )
527
+ # schedule the sync loop
528
+ loops_task_group.start_soon(
529
+ partial(
530
+ critical_service_loop,
531
+ workload=self.sync_with_backend,
532
+ interval=self.heartbeat_interval_seconds,
533
+ run_once=run_once,
534
+ jitter_range=0.3,
535
+ backoff=4,
536
+ )
537
+ )
538
+
539
+ self._started_event = await self._emit_worker_started_event()
540
+
541
+ if with_healthcheck:
542
+ from prefect.workers.server import build_healthcheck_server
543
+
544
+ # we'll start the ASGI server in a separate thread so that
545
+ # uvicorn does not block the main thread
546
+ healthcheck_server = build_healthcheck_server(
547
+ worker=worker,
548
+ query_interval_seconds=PREFECT_WORKER_QUERY_SECONDS.value(),
549
+ )
550
+ healthcheck_thread = threading.Thread(
551
+ name="healthcheck-server-thread",
552
+ target=healthcheck_server.run,
553
+ daemon=True,
554
+ )
555
+ healthcheck_thread.start()
556
+ printer(f"Worker {worker.name!r} started!")
557
+ finally:
558
+ if healthcheck_server and healthcheck_thread:
559
+ self._logger.debug("Stopping healthcheck server...")
560
+ healthcheck_server.should_exit = True
561
+ healthcheck_thread.join()
562
+ self._logger.debug("Healthcheck server stopped.")
563
+
564
+ printer(f"Worker {worker.name!r} stopped!")
565
+
514
566
  @abc.abstractmethod
515
567
  async def run(
516
568
  self,
@@ -525,20 +577,6 @@ class BaseWorker(abc.ABC):
525
577
  "Workers must implement a method for running submitted flow runs"
526
578
  )
527
579
 
528
- async def kill_infrastructure(
529
- self,
530
- infrastructure_pid: str,
531
- configuration: BaseJobConfiguration,
532
- grace_seconds: int = 30,
533
- ):
534
- """
535
- Method for killing infrastructure created by a worker. Should be implemented by
536
- individual workers if they support killing infrastructure.
537
- """
538
- raise NotImplementedError(
539
- "This worker does not support killing infrastructure."
540
- )
541
-
542
580
  @classmethod
543
581
  def __dispatch_key__(cls):
544
582
  if cls.__name__ == "BaseWorker":
@@ -557,8 +595,8 @@ class BaseWorker(abc.ABC):
557
595
  raise ValueError("`PREFECT_API_URL` must be set to start a Worker.")
558
596
 
559
597
  self._client = get_client()
560
- await self._client.__aenter__()
561
- await self._runs_task_group.__aenter__()
598
+ await self._exit_stack.enter_async_context(self._client)
599
+ await self._exit_stack.enter_async_context(self._runs_task_group)
562
600
 
563
601
  self.is_setup = True
564
602
 
@@ -568,14 +606,14 @@ class BaseWorker(abc.ABC):
568
606
  self.is_setup = False
569
607
  for scope in self._scheduled_task_scopes:
570
608
  scope.cancel()
571
- if self._runs_task_group:
572
- await self._runs_task_group.__aexit__(*exc_info)
573
- if self._client:
574
- await self._client.__aexit__(*exc_info)
609
+
610
+ await self._exit_stack.__aexit__(*exc_info)
611
+ if self._started_event:
612
+ await self._emit_worker_stopped_event(self._started_event)
575
613
  self._runs_task_group = None
576
614
  self._client = None
577
615
 
578
- def is_worker_still_polling(self, query_interval_seconds: int) -> bool:
616
+ def is_worker_still_polling(self, query_interval_seconds: float) -> bool:
579
617
  """
580
618
  This method is invoked by a webserver healthcheck handler
581
619
  and returns a boolean indicating if the worker has recorded a
@@ -611,138 +649,6 @@ class BaseWorker(abc.ABC):
611
649
 
612
650
  return await self._submit_scheduled_flow_runs(flow_run_response=runs_response)
613
651
 
614
- async def check_for_cancelled_flow_runs(self):
615
- if not self.is_setup:
616
- raise RuntimeError(
617
- "Worker is not set up. Please make sure you are running this worker "
618
- "as an async context manager."
619
- )
620
-
621
- self._logger.debug("Checking for cancelled flow runs...")
622
-
623
- work_queue_filter = (
624
- WorkQueueFilter(name=WorkQueueFilterName(any_=list(self._work_queues)))
625
- if self._work_queues
626
- else None
627
- )
628
-
629
- named_cancelling_flow_runs = await self._client.read_flow_runs(
630
- flow_run_filter=FlowRunFilter(
631
- state=FlowRunFilterState(
632
- type=FlowRunFilterStateType(any_=[StateType.CANCELLED]),
633
- name=FlowRunFilterStateName(any_=["Cancelling"]),
634
- ),
635
- # Avoid duplicate cancellation calls
636
- id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
637
- ),
638
- work_pool_filter=WorkPoolFilter(
639
- name=WorkPoolFilterName(any_=[self._work_pool_name])
640
- ),
641
- work_queue_filter=work_queue_filter,
642
- )
643
-
644
- typed_cancelling_flow_runs = await self._client.read_flow_runs(
645
- flow_run_filter=FlowRunFilter(
646
- state=FlowRunFilterState(
647
- type=FlowRunFilterStateType(any_=[StateType.CANCELLING]),
648
- ),
649
- # Avoid duplicate cancellation calls
650
- id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
651
- ),
652
- work_pool_filter=WorkPoolFilter(
653
- name=WorkPoolFilterName(any_=[self._work_pool_name])
654
- ),
655
- work_queue_filter=work_queue_filter,
656
- )
657
-
658
- cancelling_flow_runs = named_cancelling_flow_runs + typed_cancelling_flow_runs
659
-
660
- if cancelling_flow_runs:
661
- self._logger.info(
662
- f"Found {len(cancelling_flow_runs)} flow runs awaiting cancellation."
663
- )
664
-
665
- for flow_run in cancelling_flow_runs:
666
- self._cancelling_flow_run_ids.add(flow_run.id)
667
- self._runs_task_group.start_soon(self.cancel_run, flow_run)
668
-
669
- return cancelling_flow_runs
670
-
671
- async def cancel_run(self, flow_run: "FlowRun"):
672
- run_logger = self.get_flow_run_logger(flow_run)
673
-
674
- try:
675
- configuration = await self._get_configuration(flow_run)
676
- except ObjectNotFound:
677
- self._logger.warning(
678
- f"Flow run {flow_run.id!r} cannot be cancelled by this worker:"
679
- f" associated deployment {flow_run.deployment_id!r} does not exist."
680
- )
681
- await self._mark_flow_run_as_cancelled(
682
- flow_run,
683
- state_updates={
684
- "message": (
685
- "This flow run is missing infrastructure configuration information"
686
- " and cancellation cannot be guaranteed."
687
- )
688
- },
689
- )
690
- return
691
- else:
692
- if configuration.is_using_a_runner:
693
- self._logger.info(
694
- f"Skipping cancellation because flow run {str(flow_run.id)!r} is"
695
- " using enhanced cancellation. A dedicated runner will handle"
696
- " cancellation."
697
- )
698
- return
699
-
700
- if not flow_run.infrastructure_pid:
701
- run_logger.error(
702
- f"Flow run '{flow_run.id}' does not have an infrastructure pid"
703
- " attached. Cancellation cannot be guaranteed."
704
- )
705
- await self._mark_flow_run_as_cancelled(
706
- flow_run,
707
- state_updates={
708
- "message": (
709
- "This flow run is missing infrastructure tracking information"
710
- " and cancellation cannot be guaranteed."
711
- )
712
- },
713
- )
714
- return
715
-
716
- try:
717
- await self.kill_infrastructure(
718
- infrastructure_pid=flow_run.infrastructure_pid,
719
- configuration=configuration,
720
- )
721
- except NotImplementedError:
722
- self._logger.error(
723
- f"Worker type {self.type!r} does not support killing created "
724
- "infrastructure. Cancellation cannot be guaranteed."
725
- )
726
- except InfrastructureNotFound as exc:
727
- self._logger.warning(f"{exc} Marking flow run as cancelled.")
728
- await self._mark_flow_run_as_cancelled(flow_run)
729
- except InfrastructureNotAvailable as exc:
730
- self._logger.warning(f"{exc} Flow run cannot be cancelled by this worker.")
731
- except Exception:
732
- run_logger.exception(
733
- "Encountered exception while killing infrastructure for flow run "
734
- f"'{flow_run.id}'. Flow run may not be cancelled."
735
- )
736
- # We will try again on generic exceptions
737
- self._cancelling_flow_run_ids.remove(flow_run.id)
738
- return
739
- else:
740
- self._emit_flow_run_cancelled_event(
741
- flow_run=flow_run, configuration=configuration
742
- )
743
- await self._mark_flow_run_as_cancelled(flow_run)
744
- run_logger.info(f"Cancelled flow run '{flow_run.id}'!")
745
-
746
652
  async def _update_local_work_pool_info(self):
747
653
  try:
748
654
  work_pool = await self._client.read_work_pool(
@@ -1162,6 +1068,7 @@ class BaseWorker(abc.ABC):
1162
1068
  async def __aenter__(self):
1163
1069
  self._logger.debug("Entering worker context...")
1164
1070
  await self.setup()
1071
+
1165
1072
  return self
1166
1073
 
1167
1074
  async def __aexit__(self, *exc_info):
@@ -1245,20 +1152,3 @@ class BaseWorker(abc.ABC):
1245
1152
  related=self._event_related_resources(),
1246
1153
  follows=started_event,
1247
1154
  )
1248
-
1249
- def _emit_flow_run_cancelled_event(
1250
- self, flow_run: "FlowRun", configuration: BaseJobConfiguration
1251
- ):
1252
- related = self._event_related_resources(configuration=configuration)
1253
-
1254
- for resource in related:
1255
- if resource.role == "flow-run":
1256
- resource["prefect.infrastructure.identifier"] = str(
1257
- flow_run.infrastructure_pid
1258
- )
1259
-
1260
- emit_event(
1261
- event="prefect.worker.cancelled-flow-run",
1262
- resource=self._event_resource(),
1263
- related=related,
1264
- )