PyPI - prefect-client - Versions diffs - 3.0.0rc1__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl - Mend

prefect-client 3.0.0rc1py3-none-any.whl → 3.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

prefect/blocks/redis.py +168 -0
prefect/client/orchestration.py +17 -1
prefect/client/schemas/objects.py +12 -8
prefect/concurrency/asyncio.py +1 -1
prefect/concurrency/services.py +1 -1
prefect/deployments/base.py +7 -1
prefect/events/schemas/events.py +2 -0
prefect/flow_engine.py +2 -2
prefect/flow_runs.py +2 -2
prefect/flows.py +8 -1
prefect/futures.py +44 -43
prefect/input/run_input.py +4 -2
prefect/records/cache_policies.py +179 -0
prefect/settings.py +6 -3
prefect/states.py +6 -4
prefect/task_engine.py +169 -198
prefect/task_runners.py +6 -2
prefect/task_runs.py +203 -0
prefect/{task_server.py → task_worker.py} +37 -27
prefect/tasks.py +49 -22
prefect/transactions.py +6 -2
prefect/utilities/callables.py +74 -3
prefect/utilities/importtools.py +5 -5
prefect/variables.py +15 -10
prefect/workers/base.py +11 -1
{prefect_client-3.0.0rc1.dist-info → prefect_client-3.0.0rc2.dist-info}/METADATA +2 -1
{prefect_client-3.0.0rc1.dist-info → prefect_client-3.0.0rc2.dist-info}/RECORD +30 -27
{prefect_client-3.0.0rc1.dist-info → prefect_client-3.0.0rc2.dist-info}/LICENSE +0 -0
{prefect_client-3.0.0rc1.dist-info → prefect_client-3.0.0rc2.dist-info}/WHEEL +0 -0
{prefect_client-3.0.0rc1.dist-info → prefect_client-3.0.0rc2.dist-info}/top_level.txt +0 -0

prefect/settings.py CHANGED Viewed

@@ -1208,6 +1208,9 @@ PREFECT_API_SERVICES_FOREMAN_WORK_QUEUE_LAST_POLLED_TIMEOUT_SECONDS = Setting(
 """The number of seconds before a work queue is marked as not ready if it has not been
 polled."""
+PREFECT_API_LOG_RETRYABLE_ERRORS = Setting(bool, default=False)
+"""If `True`, log retryable errors in the API and it's services."""
 PREFECT_API_DEFAULT_LIMIT = Setting(
     int,
@@ -1527,9 +1530,9 @@ PREFECT_TASK_SCHEDULING_PENDING_TASK_TIMEOUT = Setting(
     default=timedelta(seconds=30),
 )
 """
-How long before a PENDING task are made available to another task server.  In practice,
-a task server should move a task from PENDING to RUNNING very quickly, so runs stuck in
-PENDING for a while is a sign that the task server may have crashed.
+How long before a PENDING task are made available to another task worker.  In practice,
+a task worker should move a task from PENDING to RUNNING very quickly, so runs stuck in
+PENDING for a while is a sign that the task worker may have crashed.
 """
 PREFECT_EXPERIMENTAL_ENABLE_EXTRA_RUNNER_ENDPOINTS = Setting(bool, default=False)

prefect/states.py CHANGED Viewed

@@ -204,7 +204,9 @@ async def exception_to_failed_state(
     return state
-async def return_value_to_state(retval: R, result_factory: ResultFactory) -> State[R]:
+async def return_value_to_state(
+    retval: R, result_factory: ResultFactory, key: str = None
+) -> State[R]:
     """
     Given a return value from a user's function, create a `State` the run should
     be placed in.
@@ -236,7 +238,7 @@ async def return_value_to_state(retval: R, result_factory: ResultFactory) -> Sta
         # Unless the user has already constructed a result explicitly, use the factory
         # to update the data to the correct type
         if not isinstance(state.data, BaseResult):
-            state.data = await result_factory.create_result(state.data)
+            state.data = await result_factory.create_result(state.data, key=key)
         return state
@@ -276,7 +278,7 @@ async def return_value_to_state(retval: R, result_factory: ResultFactory) -> Sta
         return State(
             type=new_state_type,
             message=message,
-            data=await result_factory.create_result(retval),
+            data=await result_factory.create_result(retval, key=key),
         )
     # Generators aren't portable, implicitly convert them to a list.
@@ -289,7 +291,7 @@ async def return_value_to_state(retval: R, result_factory: ResultFactory) -> Sta
     if isinstance(data, BaseResult):
         return Completed(data=data)
     else:
-        return Completed(data=await result_factory.create_result(data))
+        return Completed(data=await result_factory.create_result(data, key=key))
 @sync_compatible

prefect/task_engine.py CHANGED Viewed

@@ -13,12 +13,14 @@ from typing import (
     Iterable,
     Literal,
     Optional,
+    Sequence,
     Set,
     TypeVar,
     Union,
 )
 from uuid import UUID
+import anyio
 import pendulum
 from typing_extensions import ParamSpec
@@ -50,19 +52,19 @@ from prefect.settings import (
     PREFECT_TASKS_REFRESH_CACHE,
 )
 from prefect.states import (
+    AwaitingRetry,
     Failed,
     Paused,
     Pending,
     Retrying,
     Running,
-    StateDetails,
     exception_to_crashed_state,
     exception_to_failed_state,
     return_value_to_state,
 )
 from prefect.transactions import Transaction, transaction
 from prefect.utilities.asyncutils import run_coro_as_sync
-from prefect.utilities.callables import parameters_to_args_kwargs
+from prefect.utilities.callables import call_with_parameters
 from prefect.utilities.collections import visit_collection
 from prefect.utilities.engine import (
     _get_hook_name,
@@ -133,101 +135,54 @@ class TaskRunEngine(Generic[P, R]):
             )
             return False
-    def get_hooks(self, state: State, as_async: bool = False) -> Iterable[Callable]:
+    def call_hooks(self, state: State = None) -> Iterable[Callable]:
+        if state is None:
+            state = self.state
         task = self.task
         task_run = self.task_run
         if not task_run:
             raise ValueError("Task run is not set")
-        hooks = None
         if state.is_failed() and task.on_failure_hooks:
             hooks = task.on_failure_hooks
         elif state.is_completed() and task.on_completion_hooks:
             hooks = task.on_completion_hooks
+        else:
+            hooks = None
         for hook in hooks or []:
             hook_name = _get_hook_name(hook)
-            @contextmanager
-            def hook_context():
-                try:
-                    self.logger.info(
-                        f"Running hook {hook_name!r} in response to entering state"
-                        f" {state.name!r}"
-                    )
-                    yield
-                except Exception:
-                    self.logger.error(
-                        f"An error was encountered while running hook {hook_name!r}",
-                        exc_info=True,
-                    )
-                else:
-                    self.logger.info(
-                        f"Hook {hook_name!r} finished running successfully"
-                    )
-            if as_async:
-                async def _hook_fn():
-                    with hook_context():
-                        result = hook(task, task_run, state)
-                        if inspect.isawaitable(result):
-                            await result
+            try:
+                self.logger.info(
+                    f"Running hook {hook_name!r} in response to entering state"
+                    f" {state.name!r}"
+                )
+                result = hook(task, task_run, state)
+                if inspect.isawaitable(result):
+                    run_coro_as_sync(result)
+            except Exception:
+                self.logger.error(
+                    f"An error was encountered while running hook {hook_name!r}",
+                    exc_info=True,
+                )
             else:
-                def _hook_fn():
-                    with hook_context():
-                        result = hook(task, task_run, state)
-                        if inspect.isawaitable(result):
-                            run_coro_as_sync(result)
-            yield _hook_fn
+                self.logger.info(f"Hook {hook_name!r} finished running successfully")
     def compute_transaction_key(self) -> str:
-        if self.task.result_storage_key is not None:
+        key = None
+        if self.task.cache_policy:
+            task_run_context = TaskRunContext.get()
+            key = self.task.cache_policy.compute_key(
+                task_ctx=task_run_context,
+                inputs=self.parameters,
+                flow_parameters=None,
+            )
+        elif self.task.result_storage_key is not None:
             key = _format_user_supplied_storage_key(self.task.result_storage_key)
-        else:
-            key = str(self.task_run.id)
         return key
-    def _compute_state_details(
-        self, include_cache_expiration: bool = False
-    ) -> StateDetails:
-        task_run_context = TaskRunContext.get()
-        ## setup cache metadata
-        cache_key = (
-            self.task.cache_key_fn(
-                task_run_context,
-                self.parameters or {},
-            )
-            if self.task.cache_key_fn
-            else None
-        )
-        # Ignore the cached results for a cache key, default = false
-        # Setting on task level overrules the Prefect setting (env var)
-        refresh_cache = (
-            self.task.refresh_cache
-            if self.task.refresh_cache is not None
-            else PREFECT_TASKS_REFRESH_CACHE.value()
-        )
-        if include_cache_expiration:
-            cache_expiration = (
-                (pendulum.now("utc") + self.task.cache_expiration)
-                if self.task.cache_expiration
-                else None
-            )
-        else:
-            cache_expiration = None
-        return StateDetails(
-            cache_key=cache_key,
-            refresh_cache=refresh_cache,
-            cache_expiration=cache_expiration,
-        )
     def _resolve_parameters(self):
         if not self.parameters:
             return {}
@@ -283,8 +238,7 @@ class TaskRunEngine(Generic[P, R]):
             )
             return
-        state_details = self._compute_state_details()
-        new_state = Running(state_details=state_details)
+        new_state = Running()
         state = self.set_state(new_state)
         BACKOFF_MAX = 10
@@ -344,17 +298,9 @@ class TaskRunEngine(Generic[P, R]):
         if result_factory is None:
             raise ValueError("Result factory is not set")
-        # dont put this inside function, else the transaction could get serialized
-        key = transaction.key
-        def key_fn():
-            return key
-        result_factory.storage_key_fn = key_fn
         terminal_state = run_coro_as_sync(
             return_value_to_state(
-                result,
-                result_factory=result_factory,
+                result, result_factory=result_factory, key=transaction.key
             )
         )
         transaction.stage(
@@ -362,20 +308,33 @@ class TaskRunEngine(Generic[P, R]):
             on_rollback_hooks=self.task.on_rollback_hooks,
             on_commit_hooks=self.task.on_commit_hooks,
         )
-        terminal_state.state_details = self._compute_state_details(
-            include_cache_expiration=True
-        )
+        if transaction.is_committed():
+            terminal_state.name = "Cached"
         self.set_state(terminal_state)
         return result
     def handle_retry(self, exc: Exception) -> bool:
-        """
-        If the task has retries left, and the retry condition is met, set the task to retrying.
+        """Handle any task run retries.
+        - If the task has retries left, and the retry condition is met, set the task to retrying and return True.
+          - If the task has a retry delay, place in AwaitingRetry state with a delayed scheduled time.
         - If the task has no retries left, or the retry condition is not met, return False.
-        - If the task has retries left, and the retry condition is met, return True.
         """
         if self.retries < self.task.retries and self.can_retry:
-            self.set_state(Retrying(), force=True)
+            if self.task.retry_delay_seconds:
+                delay = (
+                    self.task.retry_delay_seconds[
+                        min(self.retries, len(self.task.retry_delay_seconds) - 1)
+                    ]  # repeat final delay value if attempts exceed specified delays
+                    if isinstance(self.task.retry_delay_seconds, Sequence)
+                    else self.task.retry_delay_seconds
+                )
+                new_state = AwaitingRetry(
+                    scheduled_time=pendulum.now("utc").add(seconds=delay)
+                )
+            else:
+                new_state = Retrying()
+            self.set_state(new_state, force=True)
             self.retries = self.retries + 1
             return True
         return False
@@ -461,7 +420,7 @@ class TaskRunEngine(Generic[P, R]):
             yield
     @contextmanager
-    def start(
+    def initialize_run(
         self,
         task_run_id: Optional[UUID] = None,
         dependencies: Optional[Dict[str, Set[TaskRunInput]]] = None,
@@ -496,12 +455,16 @@ class TaskRunEngine(Generic[P, R]):
                     )
                     yield self
                 except Exception:
                     # regular exceptions are caught and re-raised to the user
                     raise
                 except (Pause, Abort):
                     # Do not capture internal signals as crashes
                     raise
+                except GeneratorExit:
+                    # Do not capture generator exits as crashes
+                    raise
                 except BaseException as exc:
                     # BaseExceptions are caught and handled as crashes
                     self.handle_crash(exc)
@@ -528,9 +491,100 @@ class TaskRunEngine(Generic[P, R]):
                     self._client = None
     def is_running(self) -> bool:
-        if getattr(self, "task_run", None) is None:
+        """Whether or not the engine is currently running a task."""
+        if (task_run := getattr(self, "task_run", None)) is None:
             return False
-        return getattr(self, "task_run").state.is_running()
+        return task_run.state.is_running() or task_run.state.is_scheduled()
+    async def wait_until_ready(self):
+        """Waits until the scheduled time (if its the future), then enters Running."""
+        if scheduled_time := self.state.state_details.scheduled_time:
+            self.logger.info(
+                f"Waiting for scheduled time {scheduled_time} for task {self.task.name!r}"
+            )
+            await anyio.sleep((scheduled_time - pendulum.now("utc")).total_seconds())
+            self.set_state(
+                Retrying() if self.state.name == "AwaitingRetry" else Running(),
+                force=True,
+            )
+    # --------------------------
+    #
+    # The following methods compose the main task run loop
+    #
+    # --------------------------
+    @contextmanager
+    def start(
+        self,
+        task_run_id: Optional[UUID] = None,
+        dependencies: Optional[Dict[str, Set[TaskRunInput]]] = None,
+    ) -> Generator[None, None, None]:
+        with self.initialize_run(task_run_id=task_run_id, dependencies=dependencies):
+            with self.enter_run_context():
+                self.logger.debug(
+                    f"Executing task {self.task.name!r} for task run {self.task_run.name!r}..."
+                )
+                self.begin_run()
+                try:
+                    yield
+                finally:
+                    self.call_hooks()
+    @contextmanager
+    def transaction_context(self) -> Generator[Transaction, None, None]:
+        result_factory = getattr(TaskRunContext.get(), "result_factory", None)
+        # refresh cache setting is now repurposes as overwrite transaction record
+        overwrite = (
+            self.task.refresh_cache
+            if self.task.refresh_cache is not None
+            else PREFECT_TASKS_REFRESH_CACHE.value()
+        )
+        with transaction(
+            key=self.compute_transaction_key(),
+            store=ResultFactoryStore(result_factory=result_factory),
+            overwrite=overwrite,
+        ) as txn:
+            yield txn
+    @contextmanager
+    def run_context(self):
+        timeout_context = timeout_async if self.task.isasync else timeout
+        # reenter the run context to ensure it is up to date for every run
+        with self.enter_run_context():
+            try:
+                with timeout_context(seconds=self.task.timeout_seconds):
+                    yield self
+            except TimeoutError as exc:
+                self.handle_timeout(exc)
+            except Exception as exc:
+                self.handle_exception(exc)
+    def call_task_fn(
+        self, transaction: Transaction
+    ) -> Union[R, Coroutine[Any, Any, R]]:
+        """
+        Convenience method to call the task function. Returns a coroutine if the
+        task is async.
+        """
+        parameters = self.parameters or {}
+        if self.task.isasync:
+            async def _call_task_fn():
+                if transaction.is_committed():
+                    result = transaction.read()
+                else:
+                    result = await call_with_parameters(self.task.fn, parameters)
+                self.handle_success(result, transaction=transaction)
+            return _call_task_fn()
+        else:
+            if transaction.is_committed():
+                result = transaction.read()
+            else:
+                result = call_with_parameters(self.task.fn, parameters)
+            self.handle_success(result, transaction=transaction)
 def run_task_sync(
@@ -550,56 +604,18 @@ def run_task_sync(
         wait_for=wait_for,
         context=context,
     )
-    # This is a context manager that keeps track of the run of the task run.
-    with engine.start(task_run_id=task_run_id, dependencies=dependencies) as run:
-        with run.enter_run_context():
-            run.begin_run()
-            while run.is_running():
-                # enter run context on each loop iteration to ensure the context
-                # contains the latest task run metadata
-                with run.enter_run_context():
-                    try:
-                        # This is where the task is actually run.
-                        with timeout(seconds=run.task.timeout_seconds):
-                            call_args, call_kwargs = parameters_to_args_kwargs(
-                                task.fn, run.parameters or {}
-                            )
-                            run.logger.debug(
-                                f"Executing task {task.name!r} for task run {run.task_run.name!r}..."
-                            )
-                            result_factory = getattr(
-                                TaskRunContext.get(), "result_factory", None
-                            )
-                            with transaction(
-                                key=run.compute_transaction_key(),
-                                store=ResultFactoryStore(result_factory=result_factory),
-                            ) as txn:
-                                if txn.is_committed():
-                                    result = txn.read()
-                                else:
-                                    result = task.fn(*call_args, **call_kwargs)  # type: ignore
-                                # If the task run is successful, finalize it.
-                                # do this within the transaction lifecycle
-                                # in order to get the proper result serialization
-                                run.handle_success(result, transaction=txn)
-                    except TimeoutError as exc:
-                        run.handle_timeout(exc)
-                    except Exception as exc:
-                        run.handle_exception(exc)
-            if run.state.is_final():
-                for hook in run.get_hooks(run.state):
-                    hook()
-            if return_type == "state":
-                return run.state
-            return run.result()
+    with engine.start(task_run_id=task_run_id, dependencies=dependencies):
+        while engine.is_running():
+            run_coro_as_sync(engine.wait_until_ready())
+            with engine.run_context(), engine.transaction_context() as txn:
+                engine.call_task_fn(txn)
+    return engine.state if return_type == "state" else engine.result()
 async def run_task_async(
-    task: Task[P, Coroutine[Any, Any, R]],
+    task: Task[P, R],
     task_run_id: Optional[UUID] = None,
     task_run: Optional[TaskRun] = None,
     parameters: Optional[Dict[str, Any]] = None,
@@ -608,12 +624,6 @@ async def run_task_async(
     dependencies: Optional[Dict[str, Set[TaskRunInput]]] = None,
     context: Optional[Dict[str, Any]] = None,
 ) -> Union[R, State, None]:
-    """
-    Runs a task against the API.
-    We will most likely want to use this logic as a wrapper and return a coroutine for type inference.
-    """
     engine = TaskRunEngine[P, R](
         task=task,
         parameters=parameters,
@@ -621,53 +631,14 @@ async def run_task_async(
         wait_for=wait_for,
         context=context,
     )
-    # This is a context manager that keeps track of the run of the task run.
-    with engine.start(task_run_id=task_run_id, dependencies=dependencies) as run:
-        with run.enter_run_context():
-            run.begin_run()
-            while run.is_running():
-                # enter run context on each loop iteration to ensure the context
-                # contains the latest task run metadata
-                with run.enter_run_context():
-                    try:
-                        # This is where the task is actually run.
-                        with timeout_async(seconds=run.task.timeout_seconds):
-                            call_args, call_kwargs = parameters_to_args_kwargs(
-                                task.fn, run.parameters or {}
-                            )
-                            run.logger.debug(
-                                f"Executing task {task.name!r} for task run {run.task_run.name!r}..."
-                            )
-                            result_factory = getattr(
-                                TaskRunContext.get(), "result_factory", None
-                            )
-                            with transaction(
-                                key=run.compute_transaction_key(),
-                                store=ResultFactoryStore(result_factory=result_factory),
-                            ) as txn:
-                                if txn.is_committed():
-                                    result = txn.read()
-                                else:
-                                    result = await task.fn(*call_args, **call_kwargs)  # type: ignore
-                                # If the task run is successful, finalize it.
-                                # do this within the transaction lifecycle
-                                # in order to get the proper result serialization
-                                run.handle_success(result, transaction=txn)
-                    except TimeoutError as exc:
-                        run.handle_timeout(exc)
-                    except Exception as exc:
-                        run.handle_exception(exc)
-            if run.state.is_final():
-                for hook in run.get_hooks(run.state, as_async=True):
-                    await hook()
-            if return_type == "state":
-                return run.state
-            return run.result()
+    with engine.start(task_run_id=task_run_id, dependencies=dependencies):
+        while engine.is_running():
+            await engine.wait_until_ready()
+            with engine.run_context(), engine.transaction_context() as txn:
+                await engine.call_task_fn(txn)
+    return engine.state if return_type == "state" else engine.result()
 def run_task(

prefect/task_runners.py CHANGED Viewed

@@ -288,6 +288,10 @@ class ThreadPoolTaskRunner(TaskRunner[PrefectConcurrentFuture]):
         super().__exit__(exc_type, exc_value, traceback)
+# Here, we alias ConcurrentTaskRunner to ThreadPoolTaskRunner for backwards compatibility
+ConcurrentTaskRunner = ThreadPoolTaskRunner
 class PrefectTaskRunner(TaskRunner[PrefectDistributedFuture]):
     def __init__(self):
         super().__init__()
@@ -321,11 +325,11 @@ class PrefectTaskRunner(TaskRunner[PrefectDistributedFuture]):
         flow_run_ctx = FlowRunContext.get()
         if flow_run_ctx:
             get_run_logger(flow_run_ctx).info(
-                f"Submitting task {task.name} to for execution by a Prefect task server..."
+                f"Submitting task {task.name} to for execution by a Prefect task worker..."
             )
         else:
             self.logger.info(
-                f"Submitting task {task.name} to for execution by a Prefect task server..."
+                f"Submitting task {task.name} to for execution by a Prefect task worker..."
             )
         return task.apply_async(

prefect-client 3.0.0rc1__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl

prefect-client 3.0.0rc1py3-none-any.whl → 3.0.0rc2py3-none-any.whl