PyPI - prefect-client - Versions diffs - 2.14.9__py3-none-any.whl → 2.14.10__py3-none-any.whl - Mend

prefect-client 2.14.9py3-none-any.whl → 2.14.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

prefect/__init__.py +4 -1
prefect/client/orchestration.py +1 -2
prefect/deployments/runner.py +5 -1
prefect/engine.py +176 -11
prefect/events/clients.py +216 -5
prefect/events/filters.py +214 -0
prefect/exceptions.py +4 -0
prefect/infrastructure/base.py +106 -1
prefect/infrastructure/container.py +52 -0
prefect/infrastructure/process.py +38 -0
prefect/infrastructure/provisioners/__init__.py +2 -0
prefect/infrastructure/provisioners/cloud_run.py +7 -1
prefect/infrastructure/provisioners/container_instance.py +797 -0
prefect/states.py +26 -3
prefect/utilities/services.py +10 -0
prefect/workers/__init__.py +1 -0
prefect/workers/block.py +226 -0
prefect/workers/utilities.py +2 -1
{prefect_client-2.14.9.dist-info → prefect_client-2.14.10.dist-info}/METADATA +2 -1
{prefect_client-2.14.9.dist-info → prefect_client-2.14.10.dist-info}/RECORD +23 -20
{prefect_client-2.14.9.dist-info → prefect_client-2.14.10.dist-info}/LICENSE +0 -0
{prefect_client-2.14.9.dist-info → prefect_client-2.14.10.dist-info}/WHEEL +0 -0
{prefect_client-2.14.9.dist-info → prefect_client-2.14.10.dist-info}/top_level.txt +0 -0

prefect/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ from prefect.context import tags
 from prefect.manifests import Manifest
 from prefect.utilities.annotations import unmapped, allow_failure
 from prefect.results import BaseResult
-from prefect.engine import pause_flow_run, resume_flow_run
+from prefect.engine import pause_flow_run, resume_flow_run, suspend_flow_run
 from prefect.client.orchestration import get_client, PrefectClient
 from prefect.client.cloud import get_cloud_client, CloudClient
 import prefect.variables
@@ -172,4 +172,7 @@ __all__ = [
     "Runner",
     "serve",
     "deploy",
+    "pause_flow_run",
+    "resume_flow_run",
+    "suspend_flow_run",
 ]

prefect/client/orchestration.py CHANGED Viewed

@@ -2181,7 +2181,7 @@ class PrefectClient:
         limit: int = None,
         offset: int = None,
         sort: LogSort = LogSort.TIMESTAMP_ASC,
-    ) -> None:
+    ) -> List[Log]:
         """
         Read flow and task run logs.
         """
@@ -2491,7 +2491,6 @@ class PrefectClient:
             f"/work_pools/{work_pool_name}/get_scheduled_flow_runs",
             json=body,
         )
         return pydantic.parse_obj_as(List[WorkerFlowRunResponse], response.json())
     async def create_artifact(

prefect/deployments/runner.py CHANGED Viewed

@@ -774,7 +774,11 @@ async def deploy(
     is_docker_based_work_pool = get_from_dict(
         work_pool.base_job_template, "variables.properties.image", False
     )
-    if not is_docker_based_work_pool:
+    is_block_based_work_pool = get_from_dict(
+        work_pool.base_job_template, "variables.properties.block", False
+    )
+    # carve out an exception for block based work pools that only have a block in their base job template
+    if not is_docker_based_work_pool and not is_block_based_work_pool:
         raise ValueError(
             f"Work pool {work_pool_name!r} does not support custom Docker images. "
             "Please use a work pool with an `image` variable in its base job template."

prefect/engine.py CHANGED Viewed

@@ -84,6 +84,7 @@ import asyncio
 import contextlib
 import logging
 import os
+import random
 import signal
 import sys
 import threading
@@ -112,6 +113,7 @@ from typing_extensions import Literal
 import prefect
 import prefect.context
 import prefect.plugins
+from prefect._internal.compatibility.deprecated import deprecated_parameter
 from prefect._internal.concurrency.api import create_call, from_async, from_sync
 from prefect._internal.concurrency.calls import get_current_call
 from prefect._internal.concurrency.cancellation import CancelledError, get_deadline
@@ -172,6 +174,7 @@ from prefect.states import (
     Pending,
     Running,
     State,
+    Suspended,
     exception_to_crashed_state,
     exception_to_failed_state,
     get_state_exception,
@@ -941,6 +944,15 @@ async def orchestrate_flow_run(
 @sync_compatible
+@deprecated_parameter(
+    "flow_run_id", start_date="Dec 2023", help="Use `suspend_flow_run` instead."
+)
+@deprecated_parameter(
+    "reschedule",
+    start_date="Dec 2023",
+    when=lambda p: p is True,
+    help="Use `suspend_flow_run` instead.",
+)
 async def pause_flow_run(
     flow_run_id: UUID = None,
     timeout: int = 300,
@@ -949,7 +961,7 @@ async def pause_flow_run(
     key: str = None,
 ):
     """
-    Pauses the current flow run by stopping execution until resumed.
+    Pauses the current flow run by blocking execution until resumed.
     When called within a flow run, execution will block and no downstream tasks will
     run until the flow is resumed. Task runs that have already started will continue
@@ -1038,7 +1050,7 @@ async def _in_process_pause(
     if reschedule:
         # If a rescheduled pause, exit this process so the run can be resubmitted later
-        raise Pause()
+        raise Pause(state=state)
     # Otherwise, block and check for completion on an interval
     with anyio.move_on_after(timeout):
@@ -1088,6 +1100,90 @@ async def _out_of_process_pause(
         raise RuntimeError(response.details.reason)
+@sync_compatible
+@inject_client
+async def suspend_flow_run(
+    flow_run_id: Optional[UUID] = None,
+    timeout: Optional[int] = 300,
+    key: Optional[str] = None,
+    client: PrefectClient = None,
+):
+    """
+    Suspends a flow run by stopping code execution until resumed.
+    When suspended, the flow run will continue execution until the NEXT task is
+    orchestrated, at which point the flow will exit. Any tasks that have
+    already started will run until completion. When resumed, the flow run will
+    be rescheduled to finish execution. In order suspend a flow run in this
+    way, the flow needs to have an associated deployment and results need to be
+    configured with the `persist_results` option.
+    Args:
+        flow_run_id: a flow run id. If supplied, this function will attempt to
+            suspend the specified flow run. If not supplied will attempt to
+            suspend the current flow run.
+        timeout: the number of seconds to wait for the flow to be resumed before
+            failing. Defaults to 5 minutes (300 seconds). If the pause timeout
+            exceeds any configured flow-level timeout, the flow might fail even
+            after resuming.
+        key: An optional key to prevent calling suspend more than once. This
+            defaults to a random string and prevents suspends from running the
+            same suspend twice. A custom key can be supplied for custom
+            suspending behavior.
+    """
+    context = FlowRunContext.get()
+    if flow_run_id is None:
+        if TaskRunContext.get():
+            raise RuntimeError("Cannot suspend task runs.")
+        if context is None or context.flow_run is None:
+            raise RuntimeError(
+                "Flow runs can only be suspended from within a flow run."
+            )
+        logger = get_run_logger(context=context)
+        logger.info(
+            "Suspending flow run, execution will be rescheduled when this flow run is"
+            " resumed."
+        )
+        flow_run_id = context.flow_run.id
+        suspending_current_flow_run = True
+        pause_counter = _observed_flow_pauses(context)
+        pause_key = key or str(pause_counter)
+    else:
+        # Since we're suspending another flow run we need to generate a pause
+        # key that won't conflict with whatever suspends/pauses that flow may
+        # have. Since this method won't be called during that flow run it's
+        # okay that this is non-deterministic.
+        suspending_current_flow_run = False
+        pause_key = key or str(uuid4())
+    try:
+        state = await propose_state(
+            client=client,
+            state=Suspended(timeout_seconds=timeout, pause_key=pause_key),
+            flow_run_id=flow_run_id,
+        )
+    except Abort as exc:
+        # Aborted requests mean the suspension is not allowed
+        raise RuntimeError(f"Flow run cannot be suspended: {exc}")
+    if state.is_running():
+        # The orchestrator requests that this suspend be ignored
+        return
+    if not state.is_paused():
+        # If we receive anything but a PAUSED state, we are unable to continue
+        raise RuntimeError(
+            f"Flow run cannot be suspended. Received unexpected state from API: {state}"
+        )
+    if suspending_current_flow_run:
+        # Exit this process so the run can be resubmitted later
+        raise Pause()
 @sync_compatible
 async def resume_flow_run(flow_run_id):
     """
@@ -1585,10 +1681,18 @@ async def begin_task_run(
             state = task_run.state
         except Pause:
+            # A pause signal here should mean the flow run suspended, so we
+            # should do the same. We'll look up the flow run's pause state to
+            # try and reuse it, so we capture any data like timeouts.
+            flow_run = await client.read_flow_run(task_run.flow_run_id)
+            if flow_run.state and flow_run.state.is_paused():
+                state = flow_run.state
+            else:
+                state = Suspended()
             task_run_logger(task_run).info(
                 "Task run encountered a pause signal during orchestration."
             )
-            state = Paused()
         return state
@@ -1702,13 +1806,74 @@ async def orchestrate_task_run(
     last_state = task_run.state
     # Transition from `PENDING` -> `RUNNING`
-    state = await propose_state(
-        client,
-        Running(
-            state_details=StateDetails(cache_key=cache_key, refresh_cache=refresh_cache)
-        ),
-        task_run_id=task_run.id,
-    )
+    try:
+        state = await propose_state(
+            client,
+            Running(
+                state_details=StateDetails(
+                    cache_key=cache_key, refresh_cache=refresh_cache
+                )
+            ),
+            task_run_id=task_run.id,
+        )
+    except Pause as exc:
+        # We shouldn't get a pause signal without a state, but if this happens,
+        # just use a Paused state to assume an in-process pause.
+        state = exc.state if exc.state else Paused()
+        # If a flow submits tasks and then pauses, we may reach this point due
+        # to concurrency timing because the tasks will try to transition after
+        # the flow run has paused. Orchestration will send back a Paused state
+        # for the task runs.
+        if state.state_details.pause_reschedule:
+            # If we're being asked to pause and reschedule, we should exit the
+            # task and expect to be resumed later.
+            raise
+    if state.is_paused():
+        BACKOFF_MAX = 10  # Seconds
+        backoff_count = 0
+        async def tick():
+            nonlocal backoff_count
+            if backoff_count < BACKOFF_MAX:
+                backoff_count += 1
+            interval = 1 + backoff_count + random.random() * backoff_count
+            await anyio.sleep(interval)
+        # Enter a loop to wait for the task run to be resumed, i.e.
+        # become Pending, and then propose a Running state again.
+        while True:
+            await tick()
+            # Propose a Running state again. We do this instead of reading the
+            # task run because if the flow run times out, this lets
+            # orchestration fail the task run.
+            try:
+                state = await propose_state(
+                    client,
+                    Running(
+                        state_details=StateDetails(
+                            cache_key=cache_key, refresh_cache=refresh_cache
+                        )
+                    ),
+                    task_run_id=task_run.id,
+                )
+            except Pause as exc:
+                if not exc.state:
+                    continue
+                if exc.state.state_details.pause_reschedule:
+                    # If the pause state includes pause_reschedule, we should exit the
+                    # task and expect to be resumed later. We've already checked for this
+                    # above, but we check again here in case the state changed; e.g. the
+                    # flow run suspended.
+                    raise
+                else:
+                    # Propose a Running state again.
+                    continue
+            else:
+                break
     # Emit an event to capture the result of proposing a `RUNNING` state.
     last_event = _emit_task_run_state_change_event(
@@ -2207,7 +2372,7 @@ async def propose_state(
     elif response.status == SetStateStatus.REJECT:
         if response.state.is_paused():
-            raise Pause(response.details.reason)
+            raise Pause(response.details.reason, state=response.state)
         return response.state
     else:

prefect/events/clients.py CHANGED Viewed

@@ -1,12 +1,42 @@
 import abc
 import asyncio
 from types import TracebackType
-from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Type,
+)
+from uuid import UUID
+import orjson
+import pendulum
+try:
+    from cachetools import TTLCache
+except ImportError:
+    pass
+from starlette.status import WS_1008_POLICY_VIOLATION
 from websockets.client import WebSocketClientProtocol, connect
-from websockets.exceptions import ConnectionClosed
+from websockets.exceptions import (
+    ConnectionClosed,
+    ConnectionClosedError,
+    ConnectionClosedOK,
+)
 from prefect.events import Event
+from prefect.logging import get_logger
+from prefect.settings import PREFECT_API_KEY, PREFECT_API_URL
+if TYPE_CHECKING:
+    from prefect.events.filters import EventFilter
+logger = get_logger(__name__)
 class EventsClient(abc.ABC):
@@ -79,6 +109,20 @@ class AssertingEventsClient(EventsClient):
         return self
+def _get_api_url_and_key(
+    api_url: Optional[str], api_key: Optional[str]
+) -> Tuple[str, str]:
+    api_url = api_url or PREFECT_API_URL.value()
+    api_key = api_key or PREFECT_API_KEY.value()
+    if not api_url or not api_key:
+        raise ValueError(
+            "api_url and api_key must be provided or set in the Prefect configuration"
+        )
+    return api_url, api_key
 class PrefectCloudEventsClient(EventsClient):
     """A Prefect Events client that streams Events to a Prefect Cloud Workspace"""
@@ -87,8 +131,8 @@ class PrefectCloudEventsClient(EventsClient):
     def __init__(
         self,
-        api_url: str,
-        api_key: str,
+        api_url: str = None,
+        api_key: str = None,
         reconnection_attempts: int = 10,
         checkpoint_every: int = 20,
     ):
@@ -101,6 +145,8 @@ class PrefectCloudEventsClient(EventsClient):
             checkpoint_every: How often the client should sync with the server to
                 confirm receipt of all previously sent events
         """
+        api_url, api_key = _get_api_url_and_key(api_url, api_key)
         socket_url = (
             api_url.replace("https://", "wss://")
             .replace("http://", "ws://")
@@ -195,3 +241,168 @@ class PrefectCloudEventsClient(EventsClient):
                     # a standard load balancer timeout, but after that, just take a
                     # beat to let things come back around.
                     await asyncio.sleep(1)
+SEEN_EVENTS_SIZE = 500_000
+SEEN_EVENTS_TTL = 120
+class PrefectCloudEventSubscriber:
+    """
+    Subscribes to a Prefect Cloud event stream, yielding events as they occur.
+    Example:
+        from prefect.events.clients import PrefectCloudEventSubscriber
+        from prefect.events.filters import EventFilter, EventNameFilter
+        filter = EventFilter(event=EventNameFilter(prefix=["prefect.flow-run."]))
+        async with PrefectCloudEventSubscriber(api_url, api_key, filter) as subscriber:
+            async for event in subscriber:
+                print(event.occurred, event.resource.id, event.event)
+    """
+    _websocket: Optional[WebSocketClientProtocol]
+    _filter: "EventFilter"
+    _seen_events: Mapping[UUID, bool]
+    def __init__(
+        self,
+        api_url: str = None,
+        api_key: str = None,
+        filter: "EventFilter" = None,
+        reconnection_attempts: int = 10,
+    ):
+        """
+        Args:
+            api_url: The base URL for a Prefect Cloud workspace
+            api_key: The API of an actor with the manage_events scope
+            reconnection_attempts: When the client is disconnected, how many times
+                the client should attempt to reconnect
+        """
+        api_url, api_key = _get_api_url_and_key(api_url, api_key)
+        from prefect.events.filters import EventFilter
+        self._filter = filter or EventFilter()
+        self._seen_events = TTLCache(maxsize=SEEN_EVENTS_SIZE, ttl=SEEN_EVENTS_TTL)
+        socket_url = (
+            api_url.replace("https://", "wss://")
+            .replace("http://", "ws://")
+            .rstrip("/")
+        ) + "/events/out"
+        logger.debug("Connecting to %s", socket_url)
+        self._api_key = api_key
+        self._connect = connect(
+            socket_url,
+            subprotocols=["prefect"],
+        )
+        self._websocket = None
+        self._reconnection_attempts = reconnection_attempts
+    async def __aenter__(self) -> "PrefectCloudEventSubscriber":
+        # Don't handle any errors in the initial connection, because these are most
+        # likely a permission or configuration issue that should propagate
+        await self._reconnect()
+        return self
+    async def _reconnect(self) -> None:
+        logger.debug("Reconnecting...")
+        if self._websocket:
+            self._websocket = None
+            await self._connect.__aexit__(None, None, None)
+        self._websocket = await self._connect.__aenter__()
+        # make sure we have actually connected
+        logger.debug("  pinging...")
+        pong = await self._websocket.ping()
+        await pong
+        logger.debug("  authenticating...")
+        await self._websocket.send(
+            orjson.dumps({"type": "auth", "token": self._api_key}).decode()
+        )
+        try:
+            message = orjson.loads(await self._websocket.recv())
+            logger.debug("  auth result %s", message)
+            assert message["type"] == "auth_success"
+        except (AssertionError, ConnectionClosedError) as e:
+            if isinstance(e, AssertionError) or e.code == WS_1008_POLICY_VIOLATION:
+                raise Exception(
+                    "Unable to authenticate to the event stream. Please ensure the "
+                    "provided api_key you are using is valid for this environment."
+                ) from e
+            raise
+        from prefect.events.filters import EventOccurredFilter
+        self._filter.occurred = EventOccurredFilter(
+            since=pendulum.now("UTC").subtract(minutes=1),
+            until=pendulum.now("UTC").add(years=1),
+        )
+        logger.debug("  filtering events since %s...", self._filter.occurred.since)
+        filter_message = {
+            "type": "filter",
+            "filter": self._filter.dict(json_compatible=True),
+        }
+        await self._websocket.send(orjson.dumps(filter_message).decode())
+    async def __aexit__(
+        self,
+        exc_type: Optional[Type[Exception]],
+        exc_val: Optional[Exception],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
+        self._websocket = None
+        await self._connect.__aexit__(exc_type, exc_val, exc_tb)
+    def __aiter__(self) -> "PrefectCloudEventSubscriber":
+        return self
+    async def __anext__(self) -> Event:
+        for i in range(self._reconnection_attempts + 1):
+            try:
+                # If we're here and the websocket is None, then we've had a failure in a
+                # previous reconnection attempt.
+                #
+                # Otherwise, after the first time through this loop, we're recovering
+                # from a ConnectionClosed, so reconnect now.
+                if not self._websocket or i > 0:
+                    await self._reconnect()
+                    assert self._websocket
+                while True:
+                    message = orjson.loads(await self._websocket.recv())
+                    event = Event.parse_obj(message["event"])
+                    if event.id in self._seen_events:
+                        continue
+                    self._seen_events[event.id] = True
+                    return event
+            except ConnectionClosedOK:
+                logger.debug('Connection closed with "OK" status')
+                raise StopAsyncIteration
+            except ConnectionClosed:
+                logger.debug(
+                    "Connection closed with %s/%s attempts",
+                    i + 1,
+                    self._reconnection_attempts,
+                )
+                if i == self._reconnection_attempts:
+                    # this was our final chance, raise the most recent error
+                    raise
+                if i > 2:
+                    # let the first two attempts happen quickly in case this is just
+                    # a standard load balancer timeout, but after that, just take a
+                    # beat to let things come back around.
+                    await asyncio.sleep(1)

prefect-client 2.14.9__py3-none-any.whl → 2.14.10__py3-none-any.whl

prefect-client 2.14.9py3-none-any.whl → 2.14.10py3-none-any.whl