PyPI - prefect-client - Versions diffs - 3.0.0rc10__py3-none-any.whl → 3.0.0rc11__py3-none-any.whl - Mend

prefect-client 3.0.0rc10py3-none-any.whl → 3.0.0rc11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

prefect/_internal/concurrency/api.py +1 -1
prefect/_internal/retries.py +61 -0
prefect/client/cloud.py +1 -1
prefect/client/schemas/objects.py +1 -1
prefect/concurrency/asyncio.py +3 -3
prefect/concurrency/events.py +1 -1
prefect/concurrency/services.py +3 -2
prefect/concurrency/sync.py +19 -5
prefect/context.py +8 -2
prefect/deployments/__init__.py +28 -15
prefect/deployments/steps/pull.py +7 -0
prefect/flow_engine.py +5 -7
prefect/flows.py +179 -65
prefect/futures.py +53 -7
prefect/logging/loggers.py +1 -1
prefect/runner/runner.py +93 -20
prefect/runner/server.py +20 -22
prefect/runner/submit.py +0 -8
prefect/runtime/flow_run.py +38 -3
prefect/settings.py +9 -13
prefect/task_worker.py +1 -1
prefect/transactions.py +16 -0
prefect/utilities/asyncutils.py +1 -0
prefect/utilities/engine.py +34 -1
prefect/workers/base.py +98 -208
prefect/workers/process.py +262 -4
prefect/workers/server.py +27 -9
{prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/METADATA +3 -3
{prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/RECORD +32 -31
{prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/LICENSE +0 -0
{prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/WHEEL +0 -0
{prefect_client-3.0.0rc10.dist-info → prefect_client-3.0.0rc11.dist-info}/top_level.txt +0 -0

prefect/workers/base.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import abc
 import inspect
-import warnings
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, Union
+import threading
+from contextlib import AsyncExitStack
+from functools import partial
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Type, Union
 from uuid import uuid4
 import anyio
@@ -12,49 +14,31 @@ from pydantic.json_schema import GenerateJsonSchema
 from typing_extensions import Literal
 import prefect
-from prefect._internal.compatibility.experimental import (
-    EXPERIMENTAL_WARNING,
-    ExperimentalFeature,
-    experiment_enabled,
-)
 from prefect._internal.schemas.validators import return_v_or_none
 from prefect.client.orchestration import PrefectClient, get_client
 from prefect.client.schemas.actions import WorkPoolCreate, WorkPoolUpdate
-from prefect.client.schemas.filters import (
-    FlowRunFilter,
-    FlowRunFilterId,
-    FlowRunFilterState,
-    FlowRunFilterStateName,
-    FlowRunFilterStateType,
-    WorkPoolFilter,
-    WorkPoolFilterName,
-    WorkQueueFilter,
-    WorkQueueFilterName,
-)
 from prefect.client.schemas.objects import StateType, WorkPool
 from prefect.client.utilities import inject_client
 from prefect.events import Event, RelatedResource, emit_event
 from prefect.events.related import object_as_related_resource, tags_as_related_resources
 from prefect.exceptions import (
     Abort,
-    InfrastructureNotAvailable,
-    InfrastructureNotFound,
     ObjectNotFound,
 )
 from prefect.logging.loggers import PrefectLogAdapter, flow_run_logger, get_logger
 from prefect.plugins import load_prefect_collections
 from prefect.settings import (
     PREFECT_API_URL,
-    PREFECT_EXPERIMENTAL_WARN,
-    PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION,
     PREFECT_TEST_MODE,
     PREFECT_WORKER_HEARTBEAT_SECONDS,
     PREFECT_WORKER_PREFETCH_SECONDS,
+    PREFECT_WORKER_QUERY_SECONDS,
     get_current_settings,
 )
 from prefect.states import Crashed, Pending, exception_to_failed_state
 from prefect.utilities.dispatch import get_registry_for_type, register_base_type
 from prefect.utilities.engine import propose_state
+from prefect.utilities.services import critical_service_loop
 from prefect.utilities.slugify import slugify
 from prefect.utilities.templating import (
     apply_values,
@@ -237,22 +221,7 @@ class BaseJobConfiguration(BaseModel):
         """
         Generate a command for a flow run job.
         """
-        if experiment_enabled("enhanced_cancellation"):
-            if (
-                PREFECT_EXPERIMENTAL_WARN
-                and PREFECT_EXPERIMENTAL_WARN_ENHANCED_CANCELLATION
-            ):
-                warnings.warn(
-                    EXPERIMENTAL_WARNING.format(
-                        feature="Enhanced flow run cancellation",
-                        group="enhanced_cancellation",
-                        help="",
-                    ),
-                    ExperimentalFeature,
-                    stacklevel=3,
-                )
-            return "prefect flow-run execute"
-        return "python -m prefect.engine"
+        return "prefect flow-run execute"
     @staticmethod
     def _base_flow_run_labels(flow_run: "FlowRun") -> Dict[str, str]:
@@ -413,12 +382,14 @@ class BaseWorker(abc.ABC):
                 ensure that work pools are not created accidentally.
             limit: The maximum number of flow runs this worker should be running at
                 a given time.
+            heartbeat_interval_seconds: The number of seconds between worker heartbeats.
             base_job_template: If creating the work pool, provide the base job
                 template to use. Logs a warning if the pool already exists.
         """
         if name and ("/" in name or "%" in name):
             raise ValueError("Worker name cannot contain '/' or '%'")
         self.name = name or f"{self.__class__.__name__} {uuid4()}"
+        self._started_event: Optional[Event] = None
         self._logger = get_logger(f"worker.{self.__class__.type}.{self.name.lower()}")
         self.is_setup = False
@@ -435,6 +406,7 @@ class BaseWorker(abc.ABC):
         )
         self._work_pool: Optional[WorkPool] = None
+        self._exit_stack: AsyncExitStack = AsyncExitStack()
         self._runs_task_group: Optional[anyio.abc.TaskGroup] = None
         self._client: Optional[PrefectClient] = None
         self._last_polled_time: pendulum.DateTime = pendulum.now("utc")
@@ -511,6 +483,86 @@ class BaseWorker(abc.ABC):
             },
         )
+    async def start(
+        self,
+        run_once: bool = False,
+        with_healthcheck: bool = False,
+        printer: Callable[..., None] = print,
+    ):
+        """
+        Starts the worker and runs the main worker loops.
+        By default, the worker will run loops to poll for scheduled/cancelled flow
+        runs and sync with the Prefect API server.
+        If `run_once` is set, the worker will only run each loop once and then return.
+        If `with_healthcheck` is set, the worker will start a healthcheck server which
+        can be used to determine if the worker is still polling for flow runs and restart
+        the worker if necessary.
+        Args:
+            run_once: If set, the worker will only run each loop once then return.
+            with_healthcheck: If set, the worker will start a healthcheck server.
+            printer: A `print`-like function where logs will be reported.
+        """
+        healthcheck_server = None
+        healthcheck_thread = None
+        try:
+            async with self as worker:
+                # wait for an initial heartbeat to configure the worker
+                await worker.sync_with_backend()
+                # schedule the scheduled flow run polling loop
+                async with anyio.create_task_group() as loops_task_group:
+                    loops_task_group.start_soon(
+                        partial(
+                            critical_service_loop,
+                            workload=self.get_and_submit_flow_runs,
+                            interval=PREFECT_WORKER_QUERY_SECONDS.value(),
+                            run_once=run_once,
+                            jitter_range=0.3,
+                            backoff=4,  # Up to ~1 minute interval during backoff
+                        )
+                    )
+                    # schedule the sync loop
+                    loops_task_group.start_soon(
+                        partial(
+                            critical_service_loop,
+                            workload=self.sync_with_backend,
+                            interval=self.heartbeat_interval_seconds,
+                            run_once=run_once,
+                            jitter_range=0.3,
+                            backoff=4,
+                        )
+                    )
+                    self._started_event = await self._emit_worker_started_event()
+                    if with_healthcheck:
+                        from prefect.workers.server import build_healthcheck_server
+                        # we'll start the ASGI server in a separate thread so that
+                        # uvicorn does not block the main thread
+                        healthcheck_server = build_healthcheck_server(
+                            worker=worker,
+                            query_interval_seconds=PREFECT_WORKER_QUERY_SECONDS.value(),
+                        )
+                        healthcheck_thread = threading.Thread(
+                            name="healthcheck-server-thread",
+                            target=healthcheck_server.run,
+                            daemon=True,
+                        )
+                        healthcheck_thread.start()
+                    printer(f"Worker {worker.name!r} started!")
+        finally:
+            if healthcheck_server and healthcheck_thread:
+                self._logger.debug("Stopping healthcheck server...")
+                healthcheck_server.should_exit = True
+                healthcheck_thread.join()
+                self._logger.debug("Healthcheck server stopped.")
+        printer(f"Worker {worker.name!r} stopped!")
     @abc.abstractmethod
     async def run(
         self,
@@ -525,20 +577,6 @@ class BaseWorker(abc.ABC):
             "Workers must implement a method for running submitted flow runs"
         )
-    async def kill_infrastructure(
-        self,
-        infrastructure_pid: str,
-        configuration: BaseJobConfiguration,
-        grace_seconds: int = 30,
-    ):
-        """
-        Method for killing infrastructure created by a worker. Should be implemented by
-        individual workers if they support killing infrastructure.
-        """
-        raise NotImplementedError(
-            "This worker does not support killing infrastructure."
-        )
     @classmethod
     def __dispatch_key__(cls):
         if cls.__name__ == "BaseWorker":
@@ -557,8 +595,8 @@ class BaseWorker(abc.ABC):
             raise ValueError("`PREFECT_API_URL` must be set to start a Worker.")
         self._client = get_client()
-        await self._client.__aenter__()
-        await self._runs_task_group.__aenter__()
+        await self._exit_stack.enter_async_context(self._client)
+        await self._exit_stack.enter_async_context(self._runs_task_group)
         self.is_setup = True
@@ -568,14 +606,14 @@ class BaseWorker(abc.ABC):
         self.is_setup = False
         for scope in self._scheduled_task_scopes:
             scope.cancel()
-        if self._runs_task_group:
-            await self._runs_task_group.__aexit__(*exc_info)
-        if self._client:
-            await self._client.__aexit__(*exc_info)
+        await self._exit_stack.__aexit__(*exc_info)
+        if self._started_event:
+            await self._emit_worker_stopped_event(self._started_event)
         self._runs_task_group = None
         self._client = None
-    def is_worker_still_polling(self, query_interval_seconds: int) -> bool:
+    def is_worker_still_polling(self, query_interval_seconds: float) -> bool:
         """
         This method is invoked by a webserver healthcheck handler
         and returns a boolean indicating if the worker has recorded a
@@ -611,138 +649,6 @@ class BaseWorker(abc.ABC):
         return await self._submit_scheduled_flow_runs(flow_run_response=runs_response)
-    async def check_for_cancelled_flow_runs(self):
-        if not self.is_setup:
-            raise RuntimeError(
-                "Worker is not set up. Please make sure you are running this worker "
-                "as an async context manager."
-            )
-        self._logger.debug("Checking for cancelled flow runs...")
-        work_queue_filter = (
-            WorkQueueFilter(name=WorkQueueFilterName(any_=list(self._work_queues)))
-            if self._work_queues
-            else None
-        )
-        named_cancelling_flow_runs = await self._client.read_flow_runs(
-            flow_run_filter=FlowRunFilter(
-                state=FlowRunFilterState(
-                    type=FlowRunFilterStateType(any_=[StateType.CANCELLED]),
-                    name=FlowRunFilterStateName(any_=["Cancelling"]),
-                ),
-                # Avoid duplicate cancellation calls
-                id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
-            ),
-            work_pool_filter=WorkPoolFilter(
-                name=WorkPoolFilterName(any_=[self._work_pool_name])
-            ),
-            work_queue_filter=work_queue_filter,
-        )
-        typed_cancelling_flow_runs = await self._client.read_flow_runs(
-            flow_run_filter=FlowRunFilter(
-                state=FlowRunFilterState(
-                    type=FlowRunFilterStateType(any_=[StateType.CANCELLING]),
-                ),
-                # Avoid duplicate cancellation calls
-                id=FlowRunFilterId(not_any_=list(self._cancelling_flow_run_ids)),
-            ),
-            work_pool_filter=WorkPoolFilter(
-                name=WorkPoolFilterName(any_=[self._work_pool_name])
-            ),
-            work_queue_filter=work_queue_filter,
-        )
-        cancelling_flow_runs = named_cancelling_flow_runs + typed_cancelling_flow_runs
-        if cancelling_flow_runs:
-            self._logger.info(
-                f"Found {len(cancelling_flow_runs)} flow runs awaiting cancellation."
-            )
-        for flow_run in cancelling_flow_runs:
-            self._cancelling_flow_run_ids.add(flow_run.id)
-            self._runs_task_group.start_soon(self.cancel_run, flow_run)
-        return cancelling_flow_runs
-    async def cancel_run(self, flow_run: "FlowRun"):
-        run_logger = self.get_flow_run_logger(flow_run)
-        try:
-            configuration = await self._get_configuration(flow_run)
-        except ObjectNotFound:
-            self._logger.warning(
-                f"Flow run {flow_run.id!r} cannot be cancelled by this worker:"
-                f" associated deployment {flow_run.deployment_id!r} does not exist."
-            )
-            await self._mark_flow_run_as_cancelled(
-                flow_run,
-                state_updates={
-                    "message": (
-                        "This flow run is missing infrastructure configuration information"
-                        " and cancellation cannot be guaranteed."
-                    )
-                },
-            )
-            return
-        else:
-            if configuration.is_using_a_runner:
-                self._logger.info(
-                    f"Skipping cancellation because flow run {str(flow_run.id)!r} is"
-                    " using enhanced cancellation. A dedicated runner will handle"
-                    " cancellation."
-                )
-                return
-        if not flow_run.infrastructure_pid:
-            run_logger.error(
-                f"Flow run '{flow_run.id}' does not have an infrastructure pid"
-                " attached. Cancellation cannot be guaranteed."
-            )
-            await self._mark_flow_run_as_cancelled(
-                flow_run,
-                state_updates={
-                    "message": (
-                        "This flow run is missing infrastructure tracking information"
-                        " and cancellation cannot be guaranteed."
-                    )
-                },
-            )
-            return
-        try:
-            await self.kill_infrastructure(
-                infrastructure_pid=flow_run.infrastructure_pid,
-                configuration=configuration,
-            )
-        except NotImplementedError:
-            self._logger.error(
-                f"Worker type {self.type!r} does not support killing created "
-                "infrastructure. Cancellation cannot be guaranteed."
-            )
-        except InfrastructureNotFound as exc:
-            self._logger.warning(f"{exc} Marking flow run as cancelled.")
-            await self._mark_flow_run_as_cancelled(flow_run)
-        except InfrastructureNotAvailable as exc:
-            self._logger.warning(f"{exc} Flow run cannot be cancelled by this worker.")
-        except Exception:
-            run_logger.exception(
-                "Encountered exception while killing infrastructure for flow run "
-                f"'{flow_run.id}'. Flow run may not be cancelled."
-            )
-            # We will try again on generic exceptions
-            self._cancelling_flow_run_ids.remove(flow_run.id)
-            return
-        else:
-            self._emit_flow_run_cancelled_event(
-                flow_run=flow_run, configuration=configuration
-            )
-            await self._mark_flow_run_as_cancelled(flow_run)
-            run_logger.info(f"Cancelled flow run '{flow_run.id}'!")
     async def _update_local_work_pool_info(self):
         try:
             work_pool = await self._client.read_work_pool(
@@ -1162,6 +1068,7 @@ class BaseWorker(abc.ABC):
     async def __aenter__(self):
         self._logger.debug("Entering worker context...")
         await self.setup()
         return self
     async def __aexit__(self, *exc_info):
@@ -1245,20 +1152,3 @@ class BaseWorker(abc.ABC):
             related=self._event_related_resources(),
             follows=started_event,
         )
-    def _emit_flow_run_cancelled_event(
-        self, flow_run: "FlowRun", configuration: BaseJobConfiguration
-    ):
-        related = self._event_related_resources(configuration=configuration)
-        for resource in related:
-            if resource.role == "flow-run":
-                resource["prefect.infrastructure.identifier"] = str(
-                    flow_run.infrastructure_pid
-                )
-        emit_event(
-            event="prefect.worker.cancelled-flow-run",
-            resource=self._event_resource(),
-            related=related,
-        )

prefect-client 3.0.0rc10__py3-none-any.whl → 3.0.0rc11__py3-none-any.whl

prefect-client 3.0.0rc10py3-none-any.whl → 3.0.0rc11py3-none-any.whl