PyPI - prefect-client - Versions diffs - 3.0.0rc3__py3-none-any.whl → 3.0.0rc5__py3-none-any.whl - Mend

prefect-client 3.0.0rc3py3-none-any.whl → 3.0.0rc5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

prefect/__init__.py +0 -3
prefect/client/schemas/schedules.py +9 -2
prefect/client/subscriptions.py +3 -3
prefect/client/types/__init__.py +0 -0
prefect/client/types/flexible_schedule_list.py +11 -0
prefect/concurrency/asyncio.py +14 -4
prefect/concurrency/services.py +29 -22
prefect/concurrency/sync.py +3 -5
prefect/context.py +0 -114
prefect/deployments/__init__.py +1 -1
prefect/deployments/runner.py +11 -93
prefect/deployments/schedules.py +5 -7
prefect/docker/__init__.py +20 -0
prefect/docker/docker_image.py +82 -0
prefect/flow_engine.py +96 -20
prefect/flows.py +36 -95
prefect/futures.py +22 -2
prefect/infrastructure/provisioners/cloud_run.py +2 -2
prefect/infrastructure/provisioners/container_instance.py +2 -2
prefect/infrastructure/provisioners/ecs.py +2 -2
prefect/records/result_store.py +5 -1
prefect/results.py +111 -42
prefect/runner/runner.py +5 -3
prefect/runner/server.py +6 -2
prefect/settings.py +1 -1
prefect/states.py +13 -3
prefect/task_engine.py +7 -6
prefect/task_runs.py +23 -9
prefect/task_worker.py +128 -19
prefect/tasks.py +20 -16
prefect/transactions.py +8 -10
prefect/types/__init__.py +10 -3
prefect/types/entrypoint.py +13 -0
prefect/utilities/collections.py +120 -57
prefect/utilities/dockerutils.py +2 -1
prefect/utilities/urls.py +5 -5
{prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/METADATA +2 -2
{prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/RECORD +41 -37
prefect/blocks/kubernetes.py +0 -115
{prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/LICENSE +0 -0
{prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/WHEEL +0 -0
{prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/top_level.txt +0 -0

prefect/task_worker.py CHANGED Viewed

@@ -8,10 +8,14 @@ from concurrent.futures import ThreadPoolExecutor
 from contextlib import AsyncExitStack
 from contextvars import copy_context
 from typing import List, Optional
+from uuid import UUID
 import anyio
 import anyio.abc
+import pendulum
+import uvicorn
 from exceptiongroup import BaseExceptionGroup  # novermin
+from fastapi import FastAPI
 from websockets.exceptions import InvalidStatusCode
 from prefect import Task
@@ -73,8 +77,9 @@ class TaskWorker:
         limit: Optional[int] = 10,
     ):
         self.tasks: List[Task] = list(tasks)
+        self.task_keys = set(t.task_key for t in tasks if isinstance(t, Task))
-        self.started: bool = False
+        self._started_at: Optional[pendulum.DateTime] = None
         self.stopping: bool = False
         self._client = get_client()
@@ -89,10 +94,41 @@ class TaskWorker:
         self._executor = ThreadPoolExecutor(max_workers=limit if limit else None)
         self._limiter = anyio.CapacityLimiter(limit) if limit else None
+        self.in_flight_task_runs: dict[str, dict[UUID, pendulum.DateTime]] = {
+            task_key: {} for task_key in self.task_keys
+        }
+        self.finished_task_runs: dict[str, int] = {
+            task_key: 0 for task_key in self.task_keys
+        }
     @property
-    def _client_id(self) -> str:
+    def client_id(self) -> str:
         return f"{socket.gethostname()}-{os.getpid()}"
+    @property
+    def started_at(self) -> Optional[pendulum.DateTime]:
+        return self._started_at
+    @property
+    def started(self) -> bool:
+        return self._started_at is not None
+    @property
+    def limit(self) -> Optional[int]:
+        return int(self._limiter.total_tokens) if self._limiter else None
+    @property
+    def current_tasks(self) -> Optional[int]:
+        return (
+            int(self._limiter.borrowed_tokens)
+            if self._limiter
+            else sum(len(runs) for runs in self.in_flight_task_runs.values())
+        )
+    @property
+    def available_tasks(self) -> Optional[int]:
+        return int(self._limiter.available_tokens) if self._limiter else None
     def handle_sigterm(self, signum, frame):
         """
         Shuts down the task worker when a SIGTERM is received.
@@ -133,11 +169,31 @@ class TaskWorker:
                 " calling .start()"
             )
-        self.started = False
+        self._started_at = None
         self.stopping = True
         raise StopTaskWorker
+    async def _acquire_token(self, task_run_id: UUID) -> bool:
+        try:
+            if self._limiter:
+                await self._limiter.acquire_on_behalf_of(task_run_id)
+        except RuntimeError:
+            logger.debug(f"Token already acquired for task run: {task_run_id!r}")
+            return False
+        return True
+    def _release_token(self, task_run_id: UUID) -> bool:
+        try:
+            if self._limiter:
+                self._limiter.release_on_behalf_of(task_run_id)
+        except RuntimeError:
+            logger.debug(f"No token to release for task run: {task_run_id!r}")
+            return False
+        return True
     async def _subscribe_to_task_scheduling(self):
         base_url = PREFECT_API_URL.value()
         if base_url is None:
@@ -146,24 +202,26 @@ class TaskWorker:
                 "Task workers are not compatible with the ephemeral API."
             )
         task_keys_repr = " | ".join(
-            t.task_key.split(".")[-1].split("-")[0] for t in self.tasks
+            task_key.split(".")[-1].split("-")[0] for task_key in sorted(self.task_keys)
         )
         logger.info(f"Subscribing to runs of task(s): {task_keys_repr}")
         async for task_run in Subscription(
             model=TaskRun,
             path="/task_runs/subscriptions/scheduled",
-            keys=[task.task_key for task in self.tasks],
-            client_id=self._client_id,
+            keys=self.task_keys,
+            client_id=self.client_id,
             base_url=base_url,
         ):
             logger.info(f"Received task run: {task_run.id} - {task_run.name}")
-            if self._limiter:
-                await self._limiter.acquire_on_behalf_of(task_run.id)
-            self._runs_task_group.start_soon(
-                self._safe_submit_scheduled_task_run, task_run
-            )
+            token_acquired = await self._acquire_token(task_run.id)
+            if token_acquired:
+                self._runs_task_group.start_soon(
+                    self._safe_submit_scheduled_task_run, task_run
+                )
     async def _safe_submit_scheduled_task_run(self, task_run: TaskRun):
+        self.in_flight_task_runs[task_run.task_key][task_run.id] = pendulum.now()
         try:
             await self._submit_scheduled_task_run(task_run)
         except BaseException as exc:
@@ -172,8 +230,9 @@ class TaskWorker:
                 exc_info=exc,
             )
         finally:
-            if self._limiter:
-                self._limiter.release_on_behalf_of(task_run.id)
+            self.in_flight_task_runs[task_run.task_key].pop(task_run.id, None)
+            self.finished_task_runs[task_run.task_key] += 1
+            self._release_token(task_run.id)
     async def _submit_scheduled_task_run(self, task_run: TaskRun):
         logger.debug(
@@ -284,9 +343,9 @@ class TaskWorker:
     async def execute_task_run(self, task_run: TaskRun):
         """Execute a task run in the task worker."""
         async with self if not self.started else asyncnullcontext():
-            if self._limiter:
-                await self._limiter.acquire_on_behalf_of(task_run.id)
-            await self._safe_submit_scheduled_task_run(task_run)
+            token_acquired = await self._acquire_token(task_run.id)
+            if token_acquired:
+                await self._safe_submit_scheduled_task_run(task_run)
     async def __aenter__(self):
         logger.debug("Starting task worker...")
@@ -298,17 +357,42 @@ class TaskWorker:
         await self._exit_stack.enter_async_context(self._runs_task_group)
         self._exit_stack.enter_context(self._executor)
-        self.started = True
+        self._started_at = pendulum.now()
         return self
     async def __aexit__(self, *exc_info):
         logger.debug("Stopping task worker...")
-        self.started = False
+        self._started_at = None
         await self._exit_stack.__aexit__(*exc_info)
+def create_status_server(task_worker: TaskWorker) -> FastAPI:
+    status_app = FastAPI()
+    @status_app.get("/status")
+    def status():
+        return {
+            "client_id": task_worker.client_id,
+            "started_at": task_worker.started_at.isoformat(),
+            "stopping": task_worker.stopping,
+            "limit": task_worker.limit,
+            "current": task_worker.current_tasks,
+            "available": task_worker.available_tasks,
+            "tasks": sorted(task_worker.task_keys),
+            "finished": task_worker.finished_task_runs,
+            "in_flight": {
+                key: {str(run): start.isoformat() for run, start in tasks.items()}
+                for key, tasks in task_worker.in_flight_task_runs.items()
+            },
+        }
+    return status_app
 @sync_compatible
-async def serve(*tasks: Task, limit: Optional[int] = 10):
+async def serve(
+    *tasks: Task, limit: Optional[int] = 10, status_server_port: Optional[int] = None
+):
     """Serve the provided tasks so that their runs may be submitted to and executed.
     in the engine. Tasks do not need to be within a flow run context to be submitted.
     You must `.submit` the same task object that you pass to `serve`.
@@ -318,6 +402,9 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
             given task, the task run will be submitted to the engine for execution.
         - limit: The maximum number of tasks that can be run concurrently. Defaults to 10.
             Pass `None` to remove the limit.
+        - status_server_port: An optional port on which to start an HTTP server
+            exposing status information about the task worker. If not provided, no
+            status server will run.
     Example:
         ```python
@@ -339,6 +426,20 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
     """
     task_worker = TaskWorker(*tasks, limit=limit)
+    status_server_task = None
+    if status_server_port is not None:
+        server = uvicorn.Server(
+            uvicorn.Config(
+                app=create_status_server(task_worker),
+                host="127.0.0.1",
+                port=status_server_port,
+                access_log=False,
+                log_level="warning",
+            )
+        )
+        loop = asyncio.get_event_loop()
+        status_server_task = loop.create_task(server.serve())
     try:
         await task_worker.start()
@@ -355,3 +456,11 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
     except (asyncio.CancelledError, KeyboardInterrupt):
         logger.info("Task worker interrupted, stopping...")
+    finally:
+        if status_server_task:
+            status_server_task.cancel()
+            try:
+                await status_server_task
+            except asyncio.CancelledError:
+                pass

prefect/tasks.py CHANGED Viewed

@@ -37,14 +37,13 @@ from prefect.client.schemas import TaskRun
 from prefect.client.schemas.objects import TaskRunInput, TaskRunResult
 from prefect.context import (
     FlowRunContext,
-    PrefectObjectRegistry,
     TagsContext,
     TaskRunContext,
     serialize_context,
 )
 from prefect.futures import PrefectDistributedFuture, PrefectFuture
 from prefect.logging.loggers import get_logger
-from prefect.records.cache_policies import DEFAULT, CachePolicy
+from prefect.records.cache_policies import DEFAULT, NONE, CachePolicy
 from prefect.results import ResultFactory, ResultSerializer, ResultStorage
 from prefect.settings import (
     PREFECT_TASK_DEFAULT_RETRIES,
@@ -174,7 +173,6 @@ def _infer_parent_task_runs(
     return parents
-@PrefectObjectRegistry.register_instances
 class Task(Generic[P, R]):
     """
     A Prefect task definition.
@@ -218,10 +216,8 @@ class Task(Generic[P, R]):
             cannot exceed 50.
         retry_jitter_factor: An optional factor that defines the factor to which a retry
             can be jittered in order to avoid a "thundering herd".
-        persist_result: An optional toggle indicating whether the result of this task
-            should be persisted to result storage. Defaults to `None`, which indicates
-            that Prefect should choose whether the result should be persisted depending on
-            the features being used.
+        persist_result: An toggle indicating whether the result of this task
+            should be persisted to result storage. Defaults to `True`.
         result_storage: An optional block to use to persist the result of this task.
             Defaults to the value set in the flow the task is called in.
         result_storage_key: An optional key to store the result in storage at when persisted.
@@ -273,7 +269,7 @@ class Task(Generic[P, R]):
             ]
         ] = None,
         retry_jitter_factor: Optional[float] = None,
-        persist_result: Optional[bool] = None,
+        persist_result: bool = True,
         result_storage: Optional[ResultStorage] = None,
         result_serializer: Optional[ResultSerializer] = None,
         result_storage_key: Optional[str] = None,
@@ -368,7 +364,11 @@ class Task(Generic[P, R]):
             self.task_key = f"{self.fn.__qualname__}-{task_origin_hash}"
-        # TODO: warn of precedence of cache policies and cache key fn if both provided?
+        if cache_policy is not NotSet and cache_key_fn is not None:
+            logger.warning(
+                f"Both `cache_policy` and `cache_key_fn` are set on task {self}. `cache_key_fn` will be used."
+            )
         if cache_key_fn:
             cache_policy = CachePolicy.from_cache_key_fn(cache_key_fn)
@@ -377,7 +377,13 @@ class Task(Generic[P, R]):
         self.cache_expiration = cache_expiration
         self.refresh_cache = refresh_cache
-        if cache_policy is NotSet and result_storage_key is None:
+        if not persist_result:
+            self.cache_policy = None if cache_policy is None else NONE
+            if cache_policy and cache_policy is not NotSet and cache_policy != NONE:
+                logger.warning(
+                    "Ignoring `cache_policy` because `persist_result` is False"
+                )
+        elif cache_policy is NotSet and result_storage_key is None:
             self.cache_policy = DEFAULT
         elif result_storage_key:
             # TODO: handle this situation with double storage
@@ -1326,7 +1332,7 @@ def task(
         Callable[[int], List[float]],
     ] = 0,
     retry_jitter_factor: Optional[float] = None,
-    persist_result: Optional[bool] = None,
+    persist_result: bool = True,
     result_storage: Optional[ResultStorage] = None,
     result_storage_key: Optional[str] = None,
     result_serializer: Optional[ResultSerializer] = None,
@@ -1358,7 +1364,7 @@ def task(
         float, int, List[float], Callable[[int], List[float]], None
     ] = None,
     retry_jitter_factor: Optional[float] = None,
-    persist_result: Optional[bool] = None,
+    persist_result: bool = True,
     result_storage: Optional[ResultStorage] = None,
     result_storage_key: Optional[str] = None,
     result_serializer: Optional[ResultSerializer] = None,
@@ -1404,10 +1410,8 @@ def task(
             cannot exceed 50.
         retry_jitter_factor: An optional factor that defines the factor to which a retry
             can be jittered in order to avoid a "thundering herd".
-        persist_result: An optional toggle indicating whether the result of this task
-            should be persisted to result storage. Defaults to `None`, which indicates
-            that Prefect should choose whether the result should be persisted depending on
-            the features being used.
+        persist_result: An toggle indicating whether the result of this task
+            should be persisted to result storage. Defaults to `True`.
         result_storage: An optional block to use to persist the result of this task.
             Defaults to the value set in the flow the task is called in.
         result_storage_key: An optional key to store the result in storage at when persisted.

prefect/transactions.py CHANGED Viewed

@@ -15,8 +15,11 @@ from typing_extensions import Self
 from prefect.context import ContextModel, FlowRunContext, TaskRunContext
 from prefect.records import RecordStore
 from prefect.records.result_store import ResultFactoryStore
-from prefect.results import BaseResult, ResultFactory, get_default_result_storage
-from prefect.settings import PREFECT_DEFAULT_RESULT_STORAGE_BLOCK
+from prefect.results import (
+    BaseResult,
+    ResultFactory,
+    get_or_create_default_result_storage,
+)
 from prefect.utilities.asyncutils import run_coro_as_sync
 from prefect.utilities.collections import AutoEnum
@@ -86,7 +89,7 @@ class Transaction(ContextModel):
             if parent:
                 self.commit_mode = parent.commit_mode
             else:
-                self.commit_mode = CommitMode.EAGER
+                self.commit_mode = CommitMode.LAZY
         # this needs to go before begin, which could set the state to committed
         self.state = TransactionState.ACTIVE
@@ -233,7 +236,7 @@ def get_transaction() -> Optional[Transaction]:
 def transaction(
     key: Optional[str] = None,
     store: Optional[RecordStore] = None,
-    commit_mode: CommitMode = CommitMode.LAZY,
+    commit_mode: Optional[CommitMode] = None,
     overwrite: bool = False,
 ) -> Generator[Transaction, None, None]:
     """
@@ -265,12 +268,7 @@ def transaction(
                 }
             )
         else:
-            default_storage = get_default_result_storage(_sync=True)
-            if not default_storage._block_document_id:
-                default_name = PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value().split("/")[
-                    -1
-                ]
-                default_storage.save(default_name, overwrite=True, _sync=True)
+            default_storage = get_or_create_default_result_storage(_sync=True)
             if existing_factory:
                 new_factory = existing_factory.model_copy(
                     update={

prefect/types/__init__.py CHANGED Viewed

@@ -15,12 +15,19 @@ from zoneinfo import available_timezones
 MAX_VARIABLE_NAME_LENGTH = 255
 MAX_VARIABLE_VALUE_LENGTH = 5000
-timezone_set = available_timezones()
 NonNegativeInteger = Annotated[int, Field(ge=0)]
 PositiveInteger = Annotated[int, Field(gt=0)]
 NonNegativeFloat = Annotated[float, Field(ge=0.0)]
-TimeZone = Annotated[str, Field(default="UTC", pattern="|".join(sorted(timezone_set)))]
+TimeZone = Annotated[
+    str,
+    Field(
+        default="UTC",
+        pattern="|".join(
+            [z for z in sorted(available_timezones()) if "localtime" not in z]
+        ),
+    ),
+]
 BANNED_CHARACTERS = ["/", "%", "&", ">", "<"]

prefect/types/entrypoint.py ADDED Viewed

@@ -0,0 +1,13 @@
+from enum import Enum
+class EntrypointType(Enum):
+    """
+    Enum representing a entrypoint type.
+    File path entrypoints are in the format: `path/to/file.py:function_name`.
+    Module path entrypoints are in the format: `path.to.module.function_name`.
+    """
+    FILE_PATH = "file_path"
+    MODULE_PATH = "module_path"

prefect-client 3.0.0rc3__py3-none-any.whl → 3.0.0rc5__py3-none-any.whl

prefect-client 3.0.0rc3py3-none-any.whl → 3.0.0rc5py3-none-any.whl