prefect-client 3.0.0rc3__py3-none-any.whl → 3.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prefect/__init__.py +0 -3
- prefect/client/schemas/schedules.py +9 -2
- prefect/client/subscriptions.py +3 -3
- prefect/client/types/__init__.py +0 -0
- prefect/client/types/flexible_schedule_list.py +11 -0
- prefect/concurrency/asyncio.py +14 -4
- prefect/concurrency/services.py +29 -22
- prefect/concurrency/sync.py +3 -5
- prefect/context.py +0 -114
- prefect/deployments/__init__.py +1 -1
- prefect/deployments/runner.py +11 -93
- prefect/deployments/schedules.py +5 -7
- prefect/docker/__init__.py +20 -0
- prefect/docker/docker_image.py +82 -0
- prefect/flow_engine.py +96 -20
- prefect/flows.py +36 -95
- prefect/futures.py +22 -2
- prefect/infrastructure/provisioners/cloud_run.py +2 -2
- prefect/infrastructure/provisioners/container_instance.py +2 -2
- prefect/infrastructure/provisioners/ecs.py +2 -2
- prefect/records/result_store.py +5 -1
- prefect/results.py +111 -42
- prefect/runner/runner.py +5 -3
- prefect/runner/server.py +6 -2
- prefect/settings.py +1 -1
- prefect/states.py +13 -3
- prefect/task_engine.py +7 -6
- prefect/task_runs.py +23 -9
- prefect/task_worker.py +128 -19
- prefect/tasks.py +20 -16
- prefect/transactions.py +8 -10
- prefect/types/__init__.py +10 -3
- prefect/types/entrypoint.py +13 -0
- prefect/utilities/collections.py +120 -57
- prefect/utilities/dockerutils.py +2 -1
- prefect/utilities/urls.py +5 -5
- {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/METADATA +2 -2
- {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/RECORD +41 -37
- prefect/blocks/kubernetes.py +0 -115
- {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/LICENSE +0 -0
- {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/WHEEL +0 -0
- {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/top_level.txt +0 -0
prefect/task_worker.py
CHANGED
@@ -8,10 +8,14 @@ from concurrent.futures import ThreadPoolExecutor
|
|
8
8
|
from contextlib import AsyncExitStack
|
9
9
|
from contextvars import copy_context
|
10
10
|
from typing import List, Optional
|
11
|
+
from uuid import UUID
|
11
12
|
|
12
13
|
import anyio
|
13
14
|
import anyio.abc
|
15
|
+
import pendulum
|
16
|
+
import uvicorn
|
14
17
|
from exceptiongroup import BaseExceptionGroup # novermin
|
18
|
+
from fastapi import FastAPI
|
15
19
|
from websockets.exceptions import InvalidStatusCode
|
16
20
|
|
17
21
|
from prefect import Task
|
@@ -73,8 +77,9 @@ class TaskWorker:
|
|
73
77
|
limit: Optional[int] = 10,
|
74
78
|
):
|
75
79
|
self.tasks: List[Task] = list(tasks)
|
80
|
+
self.task_keys = set(t.task_key for t in tasks if isinstance(t, Task))
|
76
81
|
|
77
|
-
self.
|
82
|
+
self._started_at: Optional[pendulum.DateTime] = None
|
78
83
|
self.stopping: bool = False
|
79
84
|
|
80
85
|
self._client = get_client()
|
@@ -89,10 +94,41 @@ class TaskWorker:
|
|
89
94
|
self._executor = ThreadPoolExecutor(max_workers=limit if limit else None)
|
90
95
|
self._limiter = anyio.CapacityLimiter(limit) if limit else None
|
91
96
|
|
97
|
+
self.in_flight_task_runs: dict[str, dict[UUID, pendulum.DateTime]] = {
|
98
|
+
task_key: {} for task_key in self.task_keys
|
99
|
+
}
|
100
|
+
self.finished_task_runs: dict[str, int] = {
|
101
|
+
task_key: 0 for task_key in self.task_keys
|
102
|
+
}
|
103
|
+
|
92
104
|
@property
|
93
|
-
def
|
105
|
+
def client_id(self) -> str:
|
94
106
|
return f"{socket.gethostname()}-{os.getpid()}"
|
95
107
|
|
108
|
+
@property
|
109
|
+
def started_at(self) -> Optional[pendulum.DateTime]:
|
110
|
+
return self._started_at
|
111
|
+
|
112
|
+
@property
|
113
|
+
def started(self) -> bool:
|
114
|
+
return self._started_at is not None
|
115
|
+
|
116
|
+
@property
|
117
|
+
def limit(self) -> Optional[int]:
|
118
|
+
return int(self._limiter.total_tokens) if self._limiter else None
|
119
|
+
|
120
|
+
@property
|
121
|
+
def current_tasks(self) -> Optional[int]:
|
122
|
+
return (
|
123
|
+
int(self._limiter.borrowed_tokens)
|
124
|
+
if self._limiter
|
125
|
+
else sum(len(runs) for runs in self.in_flight_task_runs.values())
|
126
|
+
)
|
127
|
+
|
128
|
+
@property
|
129
|
+
def available_tasks(self) -> Optional[int]:
|
130
|
+
return int(self._limiter.available_tokens) if self._limiter else None
|
131
|
+
|
96
132
|
def handle_sigterm(self, signum, frame):
|
97
133
|
"""
|
98
134
|
Shuts down the task worker when a SIGTERM is received.
|
@@ -133,11 +169,31 @@ class TaskWorker:
|
|
133
169
|
" calling .start()"
|
134
170
|
)
|
135
171
|
|
136
|
-
self.
|
172
|
+
self._started_at = None
|
137
173
|
self.stopping = True
|
138
174
|
|
139
175
|
raise StopTaskWorker
|
140
176
|
|
177
|
+
async def _acquire_token(self, task_run_id: UUID) -> bool:
|
178
|
+
try:
|
179
|
+
if self._limiter:
|
180
|
+
await self._limiter.acquire_on_behalf_of(task_run_id)
|
181
|
+
except RuntimeError:
|
182
|
+
logger.debug(f"Token already acquired for task run: {task_run_id!r}")
|
183
|
+
return False
|
184
|
+
|
185
|
+
return True
|
186
|
+
|
187
|
+
def _release_token(self, task_run_id: UUID) -> bool:
|
188
|
+
try:
|
189
|
+
if self._limiter:
|
190
|
+
self._limiter.release_on_behalf_of(task_run_id)
|
191
|
+
except RuntimeError:
|
192
|
+
logger.debug(f"No token to release for task run: {task_run_id!r}")
|
193
|
+
return False
|
194
|
+
|
195
|
+
return True
|
196
|
+
|
141
197
|
async def _subscribe_to_task_scheduling(self):
|
142
198
|
base_url = PREFECT_API_URL.value()
|
143
199
|
if base_url is None:
|
@@ -146,24 +202,26 @@ class TaskWorker:
|
|
146
202
|
"Task workers are not compatible with the ephemeral API."
|
147
203
|
)
|
148
204
|
task_keys_repr = " | ".join(
|
149
|
-
|
205
|
+
task_key.split(".")[-1].split("-")[0] for task_key in sorted(self.task_keys)
|
150
206
|
)
|
151
207
|
logger.info(f"Subscribing to runs of task(s): {task_keys_repr}")
|
152
208
|
async for task_run in Subscription(
|
153
209
|
model=TaskRun,
|
154
210
|
path="/task_runs/subscriptions/scheduled",
|
155
|
-
keys=
|
156
|
-
client_id=self.
|
211
|
+
keys=self.task_keys,
|
212
|
+
client_id=self.client_id,
|
157
213
|
base_url=base_url,
|
158
214
|
):
|
159
215
|
logger.info(f"Received task run: {task_run.id} - {task_run.name}")
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
self.
|
164
|
-
|
216
|
+
|
217
|
+
token_acquired = await self._acquire_token(task_run.id)
|
218
|
+
if token_acquired:
|
219
|
+
self._runs_task_group.start_soon(
|
220
|
+
self._safe_submit_scheduled_task_run, task_run
|
221
|
+
)
|
165
222
|
|
166
223
|
async def _safe_submit_scheduled_task_run(self, task_run: TaskRun):
|
224
|
+
self.in_flight_task_runs[task_run.task_key][task_run.id] = pendulum.now()
|
167
225
|
try:
|
168
226
|
await self._submit_scheduled_task_run(task_run)
|
169
227
|
except BaseException as exc:
|
@@ -172,8 +230,9 @@ class TaskWorker:
|
|
172
230
|
exc_info=exc,
|
173
231
|
)
|
174
232
|
finally:
|
175
|
-
|
176
|
-
|
233
|
+
self.in_flight_task_runs[task_run.task_key].pop(task_run.id, None)
|
234
|
+
self.finished_task_runs[task_run.task_key] += 1
|
235
|
+
self._release_token(task_run.id)
|
177
236
|
|
178
237
|
async def _submit_scheduled_task_run(self, task_run: TaskRun):
|
179
238
|
logger.debug(
|
@@ -284,9 +343,9 @@ class TaskWorker:
|
|
284
343
|
async def execute_task_run(self, task_run: TaskRun):
|
285
344
|
"""Execute a task run in the task worker."""
|
286
345
|
async with self if not self.started else asyncnullcontext():
|
287
|
-
|
288
|
-
|
289
|
-
|
346
|
+
token_acquired = await self._acquire_token(task_run.id)
|
347
|
+
if token_acquired:
|
348
|
+
await self._safe_submit_scheduled_task_run(task_run)
|
290
349
|
|
291
350
|
async def __aenter__(self):
|
292
351
|
logger.debug("Starting task worker...")
|
@@ -298,17 +357,42 @@ class TaskWorker:
|
|
298
357
|
await self._exit_stack.enter_async_context(self._runs_task_group)
|
299
358
|
self._exit_stack.enter_context(self._executor)
|
300
359
|
|
301
|
-
self.
|
360
|
+
self._started_at = pendulum.now()
|
302
361
|
return self
|
303
362
|
|
304
363
|
async def __aexit__(self, *exc_info):
|
305
364
|
logger.debug("Stopping task worker...")
|
306
|
-
self.
|
365
|
+
self._started_at = None
|
307
366
|
await self._exit_stack.__aexit__(*exc_info)
|
308
367
|
|
309
368
|
|
369
|
+
def create_status_server(task_worker: TaskWorker) -> FastAPI:
|
370
|
+
status_app = FastAPI()
|
371
|
+
|
372
|
+
@status_app.get("/status")
|
373
|
+
def status():
|
374
|
+
return {
|
375
|
+
"client_id": task_worker.client_id,
|
376
|
+
"started_at": task_worker.started_at.isoformat(),
|
377
|
+
"stopping": task_worker.stopping,
|
378
|
+
"limit": task_worker.limit,
|
379
|
+
"current": task_worker.current_tasks,
|
380
|
+
"available": task_worker.available_tasks,
|
381
|
+
"tasks": sorted(task_worker.task_keys),
|
382
|
+
"finished": task_worker.finished_task_runs,
|
383
|
+
"in_flight": {
|
384
|
+
key: {str(run): start.isoformat() for run, start in tasks.items()}
|
385
|
+
for key, tasks in task_worker.in_flight_task_runs.items()
|
386
|
+
},
|
387
|
+
}
|
388
|
+
|
389
|
+
return status_app
|
390
|
+
|
391
|
+
|
310
392
|
@sync_compatible
|
311
|
-
async def serve(
|
393
|
+
async def serve(
|
394
|
+
*tasks: Task, limit: Optional[int] = 10, status_server_port: Optional[int] = None
|
395
|
+
):
|
312
396
|
"""Serve the provided tasks so that their runs may be submitted to and executed.
|
313
397
|
in the engine. Tasks do not need to be within a flow run context to be submitted.
|
314
398
|
You must `.submit` the same task object that you pass to `serve`.
|
@@ -318,6 +402,9 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
|
|
318
402
|
given task, the task run will be submitted to the engine for execution.
|
319
403
|
- limit: The maximum number of tasks that can be run concurrently. Defaults to 10.
|
320
404
|
Pass `None` to remove the limit.
|
405
|
+
- status_server_port: An optional port on which to start an HTTP server
|
406
|
+
exposing status information about the task worker. If not provided, no
|
407
|
+
status server will run.
|
321
408
|
|
322
409
|
Example:
|
323
410
|
```python
|
@@ -339,6 +426,20 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
|
|
339
426
|
"""
|
340
427
|
task_worker = TaskWorker(*tasks, limit=limit)
|
341
428
|
|
429
|
+
status_server_task = None
|
430
|
+
if status_server_port is not None:
|
431
|
+
server = uvicorn.Server(
|
432
|
+
uvicorn.Config(
|
433
|
+
app=create_status_server(task_worker),
|
434
|
+
host="127.0.0.1",
|
435
|
+
port=status_server_port,
|
436
|
+
access_log=False,
|
437
|
+
log_level="warning",
|
438
|
+
)
|
439
|
+
)
|
440
|
+
loop = asyncio.get_event_loop()
|
441
|
+
status_server_task = loop.create_task(server.serve())
|
442
|
+
|
342
443
|
try:
|
343
444
|
await task_worker.start()
|
344
445
|
|
@@ -355,3 +456,11 @@ async def serve(*tasks: Task, limit: Optional[int] = 10):
|
|
355
456
|
|
356
457
|
except (asyncio.CancelledError, KeyboardInterrupt):
|
357
458
|
logger.info("Task worker interrupted, stopping...")
|
459
|
+
|
460
|
+
finally:
|
461
|
+
if status_server_task:
|
462
|
+
status_server_task.cancel()
|
463
|
+
try:
|
464
|
+
await status_server_task
|
465
|
+
except asyncio.CancelledError:
|
466
|
+
pass
|
prefect/tasks.py
CHANGED
@@ -37,14 +37,13 @@ from prefect.client.schemas import TaskRun
|
|
37
37
|
from prefect.client.schemas.objects import TaskRunInput, TaskRunResult
|
38
38
|
from prefect.context import (
|
39
39
|
FlowRunContext,
|
40
|
-
PrefectObjectRegistry,
|
41
40
|
TagsContext,
|
42
41
|
TaskRunContext,
|
43
42
|
serialize_context,
|
44
43
|
)
|
45
44
|
from prefect.futures import PrefectDistributedFuture, PrefectFuture
|
46
45
|
from prefect.logging.loggers import get_logger
|
47
|
-
from prefect.records.cache_policies import DEFAULT, CachePolicy
|
46
|
+
from prefect.records.cache_policies import DEFAULT, NONE, CachePolicy
|
48
47
|
from prefect.results import ResultFactory, ResultSerializer, ResultStorage
|
49
48
|
from prefect.settings import (
|
50
49
|
PREFECT_TASK_DEFAULT_RETRIES,
|
@@ -174,7 +173,6 @@ def _infer_parent_task_runs(
|
|
174
173
|
return parents
|
175
174
|
|
176
175
|
|
177
|
-
@PrefectObjectRegistry.register_instances
|
178
176
|
class Task(Generic[P, R]):
|
179
177
|
"""
|
180
178
|
A Prefect task definition.
|
@@ -218,10 +216,8 @@ class Task(Generic[P, R]):
|
|
218
216
|
cannot exceed 50.
|
219
217
|
retry_jitter_factor: An optional factor that defines the factor to which a retry
|
220
218
|
can be jittered in order to avoid a "thundering herd".
|
221
|
-
persist_result: An
|
222
|
-
should be persisted to result storage. Defaults to `
|
223
|
-
that Prefect should choose whether the result should be persisted depending on
|
224
|
-
the features being used.
|
219
|
+
persist_result: An toggle indicating whether the result of this task
|
220
|
+
should be persisted to result storage. Defaults to `True`.
|
225
221
|
result_storage: An optional block to use to persist the result of this task.
|
226
222
|
Defaults to the value set in the flow the task is called in.
|
227
223
|
result_storage_key: An optional key to store the result in storage at when persisted.
|
@@ -273,7 +269,7 @@ class Task(Generic[P, R]):
|
|
273
269
|
]
|
274
270
|
] = None,
|
275
271
|
retry_jitter_factor: Optional[float] = None,
|
276
|
-
persist_result:
|
272
|
+
persist_result: bool = True,
|
277
273
|
result_storage: Optional[ResultStorage] = None,
|
278
274
|
result_serializer: Optional[ResultSerializer] = None,
|
279
275
|
result_storage_key: Optional[str] = None,
|
@@ -368,7 +364,11 @@ class Task(Generic[P, R]):
|
|
368
364
|
|
369
365
|
self.task_key = f"{self.fn.__qualname__}-{task_origin_hash}"
|
370
366
|
|
371
|
-
|
367
|
+
if cache_policy is not NotSet and cache_key_fn is not None:
|
368
|
+
logger.warning(
|
369
|
+
f"Both `cache_policy` and `cache_key_fn` are set on task {self}. `cache_key_fn` will be used."
|
370
|
+
)
|
371
|
+
|
372
372
|
if cache_key_fn:
|
373
373
|
cache_policy = CachePolicy.from_cache_key_fn(cache_key_fn)
|
374
374
|
|
@@ -377,7 +377,13 @@ class Task(Generic[P, R]):
|
|
377
377
|
self.cache_expiration = cache_expiration
|
378
378
|
self.refresh_cache = refresh_cache
|
379
379
|
|
380
|
-
if
|
380
|
+
if not persist_result:
|
381
|
+
self.cache_policy = None if cache_policy is None else NONE
|
382
|
+
if cache_policy and cache_policy is not NotSet and cache_policy != NONE:
|
383
|
+
logger.warning(
|
384
|
+
"Ignoring `cache_policy` because `persist_result` is False"
|
385
|
+
)
|
386
|
+
elif cache_policy is NotSet and result_storage_key is None:
|
381
387
|
self.cache_policy = DEFAULT
|
382
388
|
elif result_storage_key:
|
383
389
|
# TODO: handle this situation with double storage
|
@@ -1326,7 +1332,7 @@ def task(
|
|
1326
1332
|
Callable[[int], List[float]],
|
1327
1333
|
] = 0,
|
1328
1334
|
retry_jitter_factor: Optional[float] = None,
|
1329
|
-
persist_result:
|
1335
|
+
persist_result: bool = True,
|
1330
1336
|
result_storage: Optional[ResultStorage] = None,
|
1331
1337
|
result_storage_key: Optional[str] = None,
|
1332
1338
|
result_serializer: Optional[ResultSerializer] = None,
|
@@ -1358,7 +1364,7 @@ def task(
|
|
1358
1364
|
float, int, List[float], Callable[[int], List[float]], None
|
1359
1365
|
] = None,
|
1360
1366
|
retry_jitter_factor: Optional[float] = None,
|
1361
|
-
persist_result:
|
1367
|
+
persist_result: bool = True,
|
1362
1368
|
result_storage: Optional[ResultStorage] = None,
|
1363
1369
|
result_storage_key: Optional[str] = None,
|
1364
1370
|
result_serializer: Optional[ResultSerializer] = None,
|
@@ -1404,10 +1410,8 @@ def task(
|
|
1404
1410
|
cannot exceed 50.
|
1405
1411
|
retry_jitter_factor: An optional factor that defines the factor to which a retry
|
1406
1412
|
can be jittered in order to avoid a "thundering herd".
|
1407
|
-
persist_result: An
|
1408
|
-
should be persisted to result storage. Defaults to `
|
1409
|
-
that Prefect should choose whether the result should be persisted depending on
|
1410
|
-
the features being used.
|
1413
|
+
persist_result: An toggle indicating whether the result of this task
|
1414
|
+
should be persisted to result storage. Defaults to `True`.
|
1411
1415
|
result_storage: An optional block to use to persist the result of this task.
|
1412
1416
|
Defaults to the value set in the flow the task is called in.
|
1413
1417
|
result_storage_key: An optional key to store the result in storage at when persisted.
|
prefect/transactions.py
CHANGED
@@ -15,8 +15,11 @@ from typing_extensions import Self
|
|
15
15
|
from prefect.context import ContextModel, FlowRunContext, TaskRunContext
|
16
16
|
from prefect.records import RecordStore
|
17
17
|
from prefect.records.result_store import ResultFactoryStore
|
18
|
-
from prefect.results import
|
19
|
-
|
18
|
+
from prefect.results import (
|
19
|
+
BaseResult,
|
20
|
+
ResultFactory,
|
21
|
+
get_or_create_default_result_storage,
|
22
|
+
)
|
20
23
|
from prefect.utilities.asyncutils import run_coro_as_sync
|
21
24
|
from prefect.utilities.collections import AutoEnum
|
22
25
|
|
@@ -86,7 +89,7 @@ class Transaction(ContextModel):
|
|
86
89
|
if parent:
|
87
90
|
self.commit_mode = parent.commit_mode
|
88
91
|
else:
|
89
|
-
self.commit_mode = CommitMode.
|
92
|
+
self.commit_mode = CommitMode.LAZY
|
90
93
|
|
91
94
|
# this needs to go before begin, which could set the state to committed
|
92
95
|
self.state = TransactionState.ACTIVE
|
@@ -233,7 +236,7 @@ def get_transaction() -> Optional[Transaction]:
|
|
233
236
|
def transaction(
|
234
237
|
key: Optional[str] = None,
|
235
238
|
store: Optional[RecordStore] = None,
|
236
|
-
commit_mode: CommitMode =
|
239
|
+
commit_mode: Optional[CommitMode] = None,
|
237
240
|
overwrite: bool = False,
|
238
241
|
) -> Generator[Transaction, None, None]:
|
239
242
|
"""
|
@@ -265,12 +268,7 @@ def transaction(
|
|
265
268
|
}
|
266
269
|
)
|
267
270
|
else:
|
268
|
-
default_storage =
|
269
|
-
if not default_storage._block_document_id:
|
270
|
-
default_name = PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value().split("/")[
|
271
|
-
-1
|
272
|
-
]
|
273
|
-
default_storage.save(default_name, overwrite=True, _sync=True)
|
271
|
+
default_storage = get_or_create_default_result_storage(_sync=True)
|
274
272
|
if existing_factory:
|
275
273
|
new_factory = existing_factory.model_copy(
|
276
274
|
update={
|
prefect/types/__init__.py
CHANGED
@@ -15,12 +15,19 @@ from zoneinfo import available_timezones
|
|
15
15
|
MAX_VARIABLE_NAME_LENGTH = 255
|
16
16
|
MAX_VARIABLE_VALUE_LENGTH = 5000
|
17
17
|
|
18
|
-
timezone_set = available_timezones()
|
19
|
-
|
20
18
|
NonNegativeInteger = Annotated[int, Field(ge=0)]
|
21
19
|
PositiveInteger = Annotated[int, Field(gt=0)]
|
22
20
|
NonNegativeFloat = Annotated[float, Field(ge=0.0)]
|
23
|
-
|
21
|
+
|
22
|
+
TimeZone = Annotated[
|
23
|
+
str,
|
24
|
+
Field(
|
25
|
+
default="UTC",
|
26
|
+
pattern="|".join(
|
27
|
+
[z for z in sorted(available_timezones()) if "localtime" not in z]
|
28
|
+
),
|
29
|
+
),
|
30
|
+
]
|
24
31
|
|
25
32
|
|
26
33
|
BANNED_CHARACTERS = ["/", "%", "&", ">", "<"]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
|
4
|
+
class EntrypointType(Enum):
|
5
|
+
"""
|
6
|
+
Enum representing a entrypoint type.
|
7
|
+
|
8
|
+
File path entrypoints are in the format: `path/to/file.py:function_name`.
|
9
|
+
Module path entrypoints are in the format: `path.to.module.function_name`.
|
10
|
+
"""
|
11
|
+
|
12
|
+
FILE_PATH = "file_path"
|
13
|
+
MODULE_PATH = "module_path"
|