prefect-client 3.0.0rc3__py3-none-any.whl → 3.0.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. prefect/__init__.py +0 -3
  2. prefect/client/schemas/schedules.py +9 -2
  3. prefect/client/subscriptions.py +3 -3
  4. prefect/client/types/__init__.py +0 -0
  5. prefect/client/types/flexible_schedule_list.py +11 -0
  6. prefect/concurrency/asyncio.py +14 -4
  7. prefect/concurrency/services.py +29 -22
  8. prefect/concurrency/sync.py +3 -5
  9. prefect/context.py +0 -114
  10. prefect/deployments/__init__.py +1 -1
  11. prefect/deployments/runner.py +11 -93
  12. prefect/deployments/schedules.py +5 -7
  13. prefect/docker/__init__.py +20 -0
  14. prefect/docker/docker_image.py +82 -0
  15. prefect/flow_engine.py +96 -20
  16. prefect/flows.py +36 -95
  17. prefect/futures.py +22 -2
  18. prefect/infrastructure/provisioners/cloud_run.py +2 -2
  19. prefect/infrastructure/provisioners/container_instance.py +2 -2
  20. prefect/infrastructure/provisioners/ecs.py +2 -2
  21. prefect/records/result_store.py +5 -1
  22. prefect/results.py +111 -42
  23. prefect/runner/runner.py +5 -3
  24. prefect/runner/server.py +6 -2
  25. prefect/settings.py +1 -1
  26. prefect/states.py +13 -3
  27. prefect/task_engine.py +7 -6
  28. prefect/task_runs.py +23 -9
  29. prefect/task_worker.py +128 -19
  30. prefect/tasks.py +20 -16
  31. prefect/transactions.py +8 -10
  32. prefect/types/__init__.py +10 -3
  33. prefect/types/entrypoint.py +13 -0
  34. prefect/utilities/collections.py +120 -57
  35. prefect/utilities/dockerutils.py +2 -1
  36. prefect/utilities/urls.py +5 -5
  37. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/METADATA +2 -2
  38. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/RECORD +41 -37
  39. prefect/blocks/kubernetes.py +0 -115
  40. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/LICENSE +0 -0
  41. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/WHEEL +0 -0
  42. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/top_level.txt +0 -0
prefect/task_worker.py CHANGED
@@ -8,10 +8,14 @@ from concurrent.futures import ThreadPoolExecutor
8
8
  from contextlib import AsyncExitStack
9
9
  from contextvars import copy_context
10
10
  from typing import List, Optional
11
+ from uuid import UUID
11
12
 
12
13
  import anyio
13
14
  import anyio.abc
15
+ import pendulum
16
+ import uvicorn
14
17
  from exceptiongroup import BaseExceptionGroup # novermin
18
+ from fastapi import FastAPI
15
19
  from websockets.exceptions import InvalidStatusCode
16
20
 
17
21
  from prefect import Task
@@ -73,8 +77,9 @@ class TaskWorker:
73
77
  limit: Optional[int] = 10,
74
78
  ):
75
79
  self.tasks: List[Task] = list(tasks)
80
+ self.task_keys = set(t.task_key for t in tasks if isinstance(t, Task))
76
81
 
77
- self.started: bool = False
82
+ self._started_at: Optional[pendulum.DateTime] = None
78
83
  self.stopping: bool = False
79
84
 
80
85
  self._client = get_client()
@@ -89,10 +94,41 @@ class TaskWorker:
89
94
  self._executor = ThreadPoolExecutor(max_workers=limit if limit else None)
90
95
  self._limiter = anyio.CapacityLimiter(limit) if limit else None
91
96
 
97
+ self.in_flight_task_runs: dict[str, dict[UUID, pendulum.DateTime]] = {
98
+ task_key: {} for task_key in self.task_keys
99
+ }
100
+ self.finished_task_runs: dict[str, int] = {
101
+ task_key: 0 for task_key in self.task_keys
102
+ }
103
+
92
104
  @property
93
- def _client_id(self) -> str:
105
+ def client_id(self) -> str:
94
106
  return f"{socket.gethostname()}-{os.getpid()}"
95
107
 
108
+ @property
109
+ def started_at(self) -> Optional[pendulum.DateTime]:
110
+ return self._started_at
111
+
112
+ @property
113
+ def started(self) -> bool:
114
+ return self._started_at is not None
115
+
116
+ @property
117
+ def limit(self) -> Optional[int]:
118
+ return int(self._limiter.total_tokens) if self._limiter else None
119
+
120
+ @property
121
+ def current_tasks(self) -> Optional[int]:
122
+ return (
123
+ int(self._limiter.borrowed_tokens)
124
+ if self._limiter
125
+ else sum(len(runs) for runs in self.in_flight_task_runs.values())
126
+ )
127
+
128
+ @property
129
+ def available_tasks(self) -> Optional[int]:
130
+ return int(self._limiter.available_tokens) if self._limiter else None
131
+
96
132
  def handle_sigterm(self, signum, frame):
97
133
  """
98
134
  Shuts down the task worker when a SIGTERM is received.
@@ -133,11 +169,31 @@ class TaskWorker:
133
169
  " calling .start()"
134
170
  )
135
171
 
136
- self.started = False
172
+ self._started_at = None
137
173
  self.stopping = True
138
174
 
139
175
  raise StopTaskWorker
140
176
 
177
+ async def _acquire_token(self, task_run_id: UUID) -> bool:
178
+ try:
179
+ if self._limiter:
180
+ await self._limiter.acquire_on_behalf_of(task_run_id)
181
+ except RuntimeError:
182
+ logger.debug(f"Token already acquired for task run: {task_run_id!r}")
183
+ return False
184
+
185
+ return True
186
+
187
+ def _release_token(self, task_run_id: UUID) -> bool:
188
+ try:
189
+ if self._limiter:
190
+ self._limiter.release_on_behalf_of(task_run_id)
191
+ except RuntimeError:
192
+ logger.debug(f"No token to release for task run: {task_run_id!r}")
193
+ return False
194
+
195
+ return True
196
+
141
197
  async def _subscribe_to_task_scheduling(self):
142
198
  base_url = PREFECT_API_URL.value()
143
199
  if base_url is None:
@@ -146,24 +202,26 @@ class TaskWorker:
146
202
  "Task workers are not compatible with the ephemeral API."
147
203
  )
148
204
  task_keys_repr = " | ".join(
149
- t.task_key.split(".")[-1].split("-")[0] for t in self.tasks
205
+ task_key.split(".")[-1].split("-")[0] for task_key in sorted(self.task_keys)
150
206
  )
151
207
  logger.info(f"Subscribing to runs of task(s): {task_keys_repr}")
152
208
  async for task_run in Subscription(
153
209
  model=TaskRun,
154
210
  path="/task_runs/subscriptions/scheduled",
155
- keys=[task.task_key for task in self.tasks],
156
- client_id=self._client_id,
211
+ keys=self.task_keys,
212
+ client_id=self.client_id,
157
213
  base_url=base_url,
158
214
  ):
159
215
  logger.info(f"Received task run: {task_run.id} - {task_run.name}")
160
- if self._limiter:
161
- await self._limiter.acquire_on_behalf_of(task_run.id)
162
- self._runs_task_group.start_soon(
163
- self._safe_submit_scheduled_task_run, task_run
164
- )
216
+
217
+ token_acquired = await self._acquire_token(task_run.id)
218
+ if token_acquired:
219
+ self._runs_task_group.start_soon(
220
+ self._safe_submit_scheduled_task_run, task_run
221
+ )
165
222
 
166
223
  async def _safe_submit_scheduled_task_run(self, task_run: TaskRun):
224
+ self.in_flight_task_runs[task_run.task_key][task_run.id] = pendulum.now()
167
225
  try:
168
226
  await self._submit_scheduled_task_run(task_run)
169
227
  except BaseException as exc:
@@ -172,8 +230,9 @@ class TaskWorker:
172
230
  exc_info=exc,
173
231
  )
174
232
  finally:
175
- if self._limiter:
176
- self._limiter.release_on_behalf_of(task_run.id)
233
+ self.in_flight_task_runs[task_run.task_key].pop(task_run.id, None)
234
+ self.finished_task_runs[task_run.task_key] += 1
235
+ self._release_token(task_run.id)
177
236
 
178
237
  async def _submit_scheduled_task_run(self, task_run: TaskRun):
179
238
  logger.debug(
@@ -284,9 +343,9 @@ class TaskWorker:
284
343
  async def execute_task_run(self, task_run: TaskRun):
285
344
  """Execute a task run in the task worker."""
286
345
  async with self if not self.started else asyncnullcontext():
287
- if self._limiter:
288
- await self._limiter.acquire_on_behalf_of(task_run.id)
289
- await self._safe_submit_scheduled_task_run(task_run)
346
+ token_acquired = await self._acquire_token(task_run.id)
347
+ if token_acquired:
348
+ await self._safe_submit_scheduled_task_run(task_run)
290
349
 
291
350
  async def __aenter__(self):
292
351
  logger.debug("Starting task worker...")
@@ -298,17 +357,42 @@ class TaskWorker:
298
357
  await self._exit_stack.enter_async_context(self._runs_task_group)
299
358
  self._exit_stack.enter_context(self._executor)
300
359
 
301
- self.started = True
360
+ self._started_at = pendulum.now()
302
361
  return self
303
362
 
304
363
  async def __aexit__(self, *exc_info):
305
364
  logger.debug("Stopping task worker...")
306
- self.started = False
365
+ self._started_at = None
307
366
  await self._exit_stack.__aexit__(*exc_info)
308
367
 
309
368
 
369
+ def create_status_server(task_worker: TaskWorker) -> FastAPI:
370
+ status_app = FastAPI()
371
+
372
+ @status_app.get("/status")
373
+ def status():
374
+ return {
375
+ "client_id": task_worker.client_id,
376
+ "started_at": task_worker.started_at.isoformat(),
377
+ "stopping": task_worker.stopping,
378
+ "limit": task_worker.limit,
379
+ "current": task_worker.current_tasks,
380
+ "available": task_worker.available_tasks,
381
+ "tasks": sorted(task_worker.task_keys),
382
+ "finished": task_worker.finished_task_runs,
383
+ "in_flight": {
384
+ key: {str(run): start.isoformat() for run, start in tasks.items()}
385
+ for key, tasks in task_worker.in_flight_task_runs.items()
386
+ },
387
+ }
388
+
389
+ return status_app
390
+
391
+
310
392
  @sync_compatible
311
- async def serve(*tasks: Task, limit: Optional[int] = 10):
393
+ async def serve(
394
+ *tasks: Task, limit: Optional[int] = 10, status_server_port: Optional[int] = None
395
+ ):
312
396
  """Serve the provided tasks so that their runs may be submitted to and executed.
313
397
  in the engine. Tasks do not need to be within a flow run context to be submitted.
314
398
  You must `.submit` the same task object that you pass to `serve`.
@@ -318,6 +402,9 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
318
402
  given task, the task run will be submitted to the engine for execution.
319
403
  - limit: The maximum number of tasks that can be run concurrently. Defaults to 10.
320
404
  Pass `None` to remove the limit.
405
+ - status_server_port: An optional port on which to start an HTTP server
406
+ exposing status information about the task worker. If not provided, no
407
+ status server will run.
321
408
 
322
409
  Example:
323
410
  ```python
@@ -339,6 +426,20 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
339
426
  """
340
427
  task_worker = TaskWorker(*tasks, limit=limit)
341
428
 
429
+ status_server_task = None
430
+ if status_server_port is not None:
431
+ server = uvicorn.Server(
432
+ uvicorn.Config(
433
+ app=create_status_server(task_worker),
434
+ host="127.0.0.1",
435
+ port=status_server_port,
436
+ access_log=False,
437
+ log_level="warning",
438
+ )
439
+ )
440
+ loop = asyncio.get_event_loop()
441
+ status_server_task = loop.create_task(server.serve())
442
+
342
443
  try:
343
444
  await task_worker.start()
344
445
 
@@ -355,3 +456,11 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
355
456
 
356
457
  except (asyncio.CancelledError, KeyboardInterrupt):
357
458
  logger.info("Task worker interrupted, stopping...")
459
+
460
+ finally:
461
+ if status_server_task:
462
+ status_server_task.cancel()
463
+ try:
464
+ await status_server_task
465
+ except asyncio.CancelledError:
466
+ pass
prefect/tasks.py CHANGED
@@ -37,14 +37,13 @@ from prefect.client.schemas import TaskRun
37
37
  from prefect.client.schemas.objects import TaskRunInput, TaskRunResult
38
38
  from prefect.context import (
39
39
  FlowRunContext,
40
- PrefectObjectRegistry,
41
40
  TagsContext,
42
41
  TaskRunContext,
43
42
  serialize_context,
44
43
  )
45
44
  from prefect.futures import PrefectDistributedFuture, PrefectFuture
46
45
  from prefect.logging.loggers import get_logger
47
- from prefect.records.cache_policies import DEFAULT, CachePolicy
46
+ from prefect.records.cache_policies import DEFAULT, NONE, CachePolicy
48
47
  from prefect.results import ResultFactory, ResultSerializer, ResultStorage
49
48
  from prefect.settings import (
50
49
  PREFECT_TASK_DEFAULT_RETRIES,
@@ -174,7 +173,6 @@ def _infer_parent_task_runs(
174
173
  return parents
175
174
 
176
175
 
177
- @PrefectObjectRegistry.register_instances
178
176
  class Task(Generic[P, R]):
179
177
  """
180
178
  A Prefect task definition.
@@ -218,10 +216,8 @@ class Task(Generic[P, R]):
218
216
  cannot exceed 50.
219
217
  retry_jitter_factor: An optional factor that defines the factor to which a retry
220
218
  can be jittered in order to avoid a "thundering herd".
221
- persist_result: An optional toggle indicating whether the result of this task
222
- should be persisted to result storage. Defaults to `None`, which indicates
223
- that Prefect should choose whether the result should be persisted depending on
224
- the features being used.
219
+ persist_result: An toggle indicating whether the result of this task
220
+ should be persisted to result storage. Defaults to `True`.
225
221
  result_storage: An optional block to use to persist the result of this task.
226
222
  Defaults to the value set in the flow the task is called in.
227
223
  result_storage_key: An optional key to store the result in storage at when persisted.
@@ -273,7 +269,7 @@ class Task(Generic[P, R]):
273
269
  ]
274
270
  ] = None,
275
271
  retry_jitter_factor: Optional[float] = None,
276
- persist_result: Optional[bool] = None,
272
+ persist_result: bool = True,
277
273
  result_storage: Optional[ResultStorage] = None,
278
274
  result_serializer: Optional[ResultSerializer] = None,
279
275
  result_storage_key: Optional[str] = None,
@@ -368,7 +364,11 @@ class Task(Generic[P, R]):
368
364
 
369
365
  self.task_key = f"{self.fn.__qualname__}-{task_origin_hash}"
370
366
 
371
- # TODO: warn of precedence of cache policies and cache key fn if both provided?
367
+ if cache_policy is not NotSet and cache_key_fn is not None:
368
+ logger.warning(
369
+ f"Both `cache_policy` and `cache_key_fn` are set on task {self}. `cache_key_fn` will be used."
370
+ )
371
+
372
372
  if cache_key_fn:
373
373
  cache_policy = CachePolicy.from_cache_key_fn(cache_key_fn)
374
374
 
@@ -377,7 +377,13 @@ class Task(Generic[P, R]):
377
377
  self.cache_expiration = cache_expiration
378
378
  self.refresh_cache = refresh_cache
379
379
 
380
- if cache_policy is NotSet and result_storage_key is None:
380
+ if not persist_result:
381
+ self.cache_policy = None if cache_policy is None else NONE
382
+ if cache_policy and cache_policy is not NotSet and cache_policy != NONE:
383
+ logger.warning(
384
+ "Ignoring `cache_policy` because `persist_result` is False"
385
+ )
386
+ elif cache_policy is NotSet and result_storage_key is None:
381
387
  self.cache_policy = DEFAULT
382
388
  elif result_storage_key:
383
389
  # TODO: handle this situation with double storage
@@ -1326,7 +1332,7 @@ def task(
1326
1332
  Callable[[int], List[float]],
1327
1333
  ] = 0,
1328
1334
  retry_jitter_factor: Optional[float] = None,
1329
- persist_result: Optional[bool] = None,
1335
+ persist_result: bool = True,
1330
1336
  result_storage: Optional[ResultStorage] = None,
1331
1337
  result_storage_key: Optional[str] = None,
1332
1338
  result_serializer: Optional[ResultSerializer] = None,
@@ -1358,7 +1364,7 @@ def task(
1358
1364
  float, int, List[float], Callable[[int], List[float]], None
1359
1365
  ] = None,
1360
1366
  retry_jitter_factor: Optional[float] = None,
1361
- persist_result: Optional[bool] = None,
1367
+ persist_result: bool = True,
1362
1368
  result_storage: Optional[ResultStorage] = None,
1363
1369
  result_storage_key: Optional[str] = None,
1364
1370
  result_serializer: Optional[ResultSerializer] = None,
@@ -1404,10 +1410,8 @@ def task(
1404
1410
  cannot exceed 50.
1405
1411
  retry_jitter_factor: An optional factor that defines the factor to which a retry
1406
1412
  can be jittered in order to avoid a "thundering herd".
1407
- persist_result: An optional toggle indicating whether the result of this task
1408
- should be persisted to result storage. Defaults to `None`, which indicates
1409
- that Prefect should choose whether the result should be persisted depending on
1410
- the features being used.
1413
+ persist_result: An toggle indicating whether the result of this task
1414
+ should be persisted to result storage. Defaults to `True`.
1411
1415
  result_storage: An optional block to use to persist the result of this task.
1412
1416
  Defaults to the value set in the flow the task is called in.
1413
1417
  result_storage_key: An optional key to store the result in storage at when persisted.
prefect/transactions.py CHANGED
@@ -15,8 +15,11 @@ from typing_extensions import Self
15
15
  from prefect.context import ContextModel, FlowRunContext, TaskRunContext
16
16
  from prefect.records import RecordStore
17
17
  from prefect.records.result_store import ResultFactoryStore
18
- from prefect.results import BaseResult, ResultFactory, get_default_result_storage
19
- from prefect.settings import PREFECT_DEFAULT_RESULT_STORAGE_BLOCK
18
+ from prefect.results import (
19
+ BaseResult,
20
+ ResultFactory,
21
+ get_or_create_default_result_storage,
22
+ )
20
23
  from prefect.utilities.asyncutils import run_coro_as_sync
21
24
  from prefect.utilities.collections import AutoEnum
22
25
 
@@ -86,7 +89,7 @@ class Transaction(ContextModel):
86
89
  if parent:
87
90
  self.commit_mode = parent.commit_mode
88
91
  else:
89
- self.commit_mode = CommitMode.EAGER
92
+ self.commit_mode = CommitMode.LAZY
90
93
 
91
94
  # this needs to go before begin, which could set the state to committed
92
95
  self.state = TransactionState.ACTIVE
@@ -233,7 +236,7 @@ def get_transaction() -> Optional[Transaction]:
233
236
  def transaction(
234
237
  key: Optional[str] = None,
235
238
  store: Optional[RecordStore] = None,
236
- commit_mode: CommitMode = CommitMode.LAZY,
239
+ commit_mode: Optional[CommitMode] = None,
237
240
  overwrite: bool = False,
238
241
  ) -> Generator[Transaction, None, None]:
239
242
  """
@@ -265,12 +268,7 @@ def transaction(
265
268
  }
266
269
  )
267
270
  else:
268
- default_storage = get_default_result_storage(_sync=True)
269
- if not default_storage._block_document_id:
270
- default_name = PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value().split("/")[
271
- -1
272
- ]
273
- default_storage.save(default_name, overwrite=True, _sync=True)
271
+ default_storage = get_or_create_default_result_storage(_sync=True)
274
272
  if existing_factory:
275
273
  new_factory = existing_factory.model_copy(
276
274
  update={
prefect/types/__init__.py CHANGED
@@ -15,12 +15,19 @@ from zoneinfo import available_timezones
15
15
  MAX_VARIABLE_NAME_LENGTH = 255
16
16
  MAX_VARIABLE_VALUE_LENGTH = 5000
17
17
 
18
- timezone_set = available_timezones()
19
-
20
18
  NonNegativeInteger = Annotated[int, Field(ge=0)]
21
19
  PositiveInteger = Annotated[int, Field(gt=0)]
22
20
  NonNegativeFloat = Annotated[float, Field(ge=0.0)]
23
- TimeZone = Annotated[str, Field(default="UTC", pattern="|".join(sorted(timezone_set)))]
21
+
22
+ TimeZone = Annotated[
23
+ str,
24
+ Field(
25
+ default="UTC",
26
+ pattern="|".join(
27
+ [z for z in sorted(available_timezones()) if "localtime" not in z]
28
+ ),
29
+ ),
30
+ ]
24
31
 
25
32
 
26
33
  BANNED_CHARACTERS = ["/", "%", "&", ">", "<"]
@@ -0,0 +1,13 @@
1
+ from enum import Enum
2
+
3
+
4
+ class EntrypointType(Enum):
5
+ """
6
+ Enum representing a entrypoint type.
7
+
8
+ File path entrypoints are in the format: `path/to/file.py:function_name`.
9
+ Module path entrypoints are in the format: `path.to.module.function_name`.
10
+ """
11
+
12
+ FILE_PATH = "file_path"
13
+ MODULE_PATH = "module_path"