prefect-client 3.0.0rc3__py3-none-any.whl → 3.0.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. prefect/__init__.py +0 -3
  2. prefect/client/schemas/schedules.py +9 -2
  3. prefect/client/subscriptions.py +3 -3
  4. prefect/client/types/__init__.py +0 -0
  5. prefect/client/types/flexible_schedule_list.py +11 -0
  6. prefect/concurrency/asyncio.py +14 -4
  7. prefect/concurrency/services.py +29 -22
  8. prefect/concurrency/sync.py +3 -5
  9. prefect/context.py +0 -114
  10. prefect/deployments/__init__.py +1 -1
  11. prefect/deployments/runner.py +11 -93
  12. prefect/deployments/schedules.py +5 -7
  13. prefect/docker/__init__.py +20 -0
  14. prefect/docker/docker_image.py +82 -0
  15. prefect/flow_engine.py +96 -20
  16. prefect/flows.py +36 -95
  17. prefect/futures.py +22 -2
  18. prefect/infrastructure/provisioners/cloud_run.py +2 -2
  19. prefect/infrastructure/provisioners/container_instance.py +2 -2
  20. prefect/infrastructure/provisioners/ecs.py +2 -2
  21. prefect/records/result_store.py +5 -1
  22. prefect/results.py +111 -42
  23. prefect/runner/runner.py +5 -3
  24. prefect/runner/server.py +6 -2
  25. prefect/settings.py +1 -1
  26. prefect/states.py +13 -3
  27. prefect/task_engine.py +7 -6
  28. prefect/task_runs.py +23 -9
  29. prefect/task_worker.py +128 -19
  30. prefect/tasks.py +20 -16
  31. prefect/transactions.py +8 -10
  32. prefect/types/__init__.py +10 -3
  33. prefect/types/entrypoint.py +13 -0
  34. prefect/utilities/collections.py +120 -57
  35. prefect/utilities/dockerutils.py +2 -1
  36. prefect/utilities/urls.py +5 -5
  37. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/METADATA +2 -2
  38. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/RECORD +41 -37
  39. prefect/blocks/kubernetes.py +0 -115
  40. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/LICENSE +0 -0
  41. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/WHEEL +0 -0
  42. {prefect_client-3.0.0rc3.dist-info → prefect_client-3.0.0rc5.dist-info}/top_level.txt +0 -0
prefect/results.py CHANGED
@@ -38,7 +38,6 @@ from prefect.settings import (
38
38
  PREFECT_RESULTS_DEFAULT_SERIALIZER,
39
39
  PREFECT_RESULTS_PERSIST_BY_DEFAULT,
40
40
  PREFECT_TASK_SCHEDULING_DEFAULT_STORAGE_BLOCK,
41
- default_result_storage_block_name,
42
41
  )
43
42
  from prefect.utilities.annotations import NotSet
44
43
  from prefect.utilities.asyncutils import sync_compatible
@@ -62,35 +61,15 @@ logger = get_logger("results")
62
61
  P = ParamSpec("P")
63
62
  R = TypeVar("R")
64
63
 
64
+ _default_storages: Dict[Tuple[str, str], WritableFileSystem] = {}
65
65
 
66
- @sync_compatible
67
- async def get_default_result_storage() -> ResultStorage:
68
- """
69
- Generate a default file system for result storage.
70
- """
71
- try:
72
- return await Block.load(PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value())
73
- except ValueError as e:
74
- if "Unable to find" not in str(e):
75
- raise e
76
- elif (
77
- PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value()
78
- == default_result_storage_block_name()
79
- ):
80
- return LocalFileSystem(basepath=PREFECT_LOCAL_STORAGE_PATH.value())
81
- else:
82
- raise
83
-
84
-
85
- _default_task_scheduling_storages: Dict[Tuple[str, str], WritableFileSystem] = {}
86
66
 
87
-
88
- async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
67
+ async def _get_or_create_default_storage(block_document_slug: str) -> ResultStorage:
89
68
  """
90
- Generate a default file system for background task parameter/result storage.
69
+ Generate a default file system for storage.
91
70
  """
92
71
  default_storage_name, storage_path = cache_key = (
93
- PREFECT_TASK_SCHEDULING_DEFAULT_STORAGE_BLOCK.value(),
72
+ block_document_slug,
94
73
  PREFECT_LOCAL_STORAGE_PATH.value(),
95
74
  )
96
75
 
@@ -105,8 +84,8 @@ async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
105
84
  if block_type_slug == "local-file-system":
106
85
  block = LocalFileSystem(basepath=storage_path)
107
86
  else:
108
- raise Exception(
109
- "The default task storage block does not exist, but it is of type "
87
+ raise ValueError(
88
+ "The default storage block does not exist, but it is of type "
110
89
  f"'{block_type_slug}' which cannot be created implicitly. Please create "
111
90
  "the block manually."
112
91
  )
@@ -123,13 +102,32 @@ async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
123
102
  return block
124
103
 
125
104
  try:
126
- return _default_task_scheduling_storages[cache_key]
105
+ return _default_storages[cache_key]
127
106
  except KeyError:
128
107
  storage = await get_storage()
129
- _default_task_scheduling_storages[cache_key] = storage
108
+ _default_storages[cache_key] = storage
130
109
  return storage
131
110
 
132
111
 
112
+ @sync_compatible
113
+ async def get_or_create_default_result_storage() -> ResultStorage:
114
+ """
115
+ Generate a default file system for result storage.
116
+ """
117
+ return await _get_or_create_default_storage(
118
+ PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value()
119
+ )
120
+
121
+
122
+ async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
123
+ """
124
+ Generate a default file system for background task parameter/result storage.
125
+ """
126
+ return await _get_or_create_default_storage(
127
+ PREFECT_TASK_SCHEDULING_DEFAULT_STORAGE_BLOCK.value()
128
+ )
129
+
130
+
133
131
  def get_default_result_serializer() -> ResultSerializer:
134
132
  """
135
133
  Generate a default file system for result storage.
@@ -210,7 +208,9 @@ class ResultFactory(BaseModel):
210
208
  kwargs.pop(key)
211
209
 
212
210
  # Apply defaults
213
- kwargs.setdefault("result_storage", await get_default_result_storage())
211
+ kwargs.setdefault(
212
+ "result_storage", await get_or_create_default_result_storage()
213
+ )
214
214
  kwargs.setdefault("result_serializer", get_default_result_serializer())
215
215
  kwargs.setdefault("persist_result", get_default_persist_setting())
216
216
  kwargs.setdefault("cache_result_in_memory", True)
@@ -280,7 +280,9 @@ class ResultFactory(BaseModel):
280
280
  """
281
281
  Create a new result factory for a task.
282
282
  """
283
- return await cls._from_task(task, get_default_result_storage, client=client)
283
+ return await cls._from_task(
284
+ task, get_or_create_default_result_storage, client=client
285
+ )
284
286
 
285
287
  @classmethod
286
288
  @inject_client
@@ -429,7 +431,11 @@ class ResultFactory(BaseModel):
429
431
 
430
432
  @sync_compatible
431
433
  async def create_result(
432
- self, obj: R, key: Optional[str] = None, expiration: Optional[DateTime] = None
434
+ self,
435
+ obj: R,
436
+ key: Optional[str] = None,
437
+ expiration: Optional[DateTime] = None,
438
+ defer_persistence: bool = False,
433
439
  ) -> Union[R, "BaseResult[R]"]:
434
440
  """
435
441
  Create a result type for the given object.
@@ -462,6 +468,7 @@ class ResultFactory(BaseModel):
462
468
  serializer=self.serializer,
463
469
  cache_object=should_cache_object,
464
470
  expiration=expiration,
471
+ defer_persistence=defer_persistence,
465
472
  )
466
473
 
467
474
  @sync_compatible
@@ -587,6 +594,19 @@ class PersistedResult(BaseResult):
587
594
  expiration: Optional[DateTime] = None
588
595
 
589
596
  _should_cache_object: bool = PrivateAttr(default=True)
597
+ _persisted: bool = PrivateAttr(default=False)
598
+ _storage_block: WritableFileSystem = PrivateAttr(default=None)
599
+ _serializer: Serializer = PrivateAttr(default=None)
600
+
601
+ def _cache_object(
602
+ self,
603
+ obj: Any,
604
+ storage_block: WritableFileSystem = None,
605
+ serializer: Serializer = None,
606
+ ) -> None:
607
+ self._cache = obj
608
+ self._storage_block = storage_block
609
+ self._serializer = serializer
590
610
 
591
611
  @sync_compatible
592
612
  @inject_client
@@ -599,7 +619,7 @@ class PersistedResult(BaseResult):
599
619
  return self._cache
600
620
 
601
621
  blob = await self._read_blob(client=client)
602
- obj = blob.serializer.loads(blob.data)
622
+ obj = blob.load()
603
623
  self.expiration = blob.expiration
604
624
 
605
625
  if self._should_cache_object:
@@ -630,6 +650,46 @@ class PersistedResult(BaseResult):
630
650
  if hasattr(storage_block, "_remote_file_system"):
631
651
  return storage_block._remote_file_system._resolve_path(key)
632
652
 
653
+ @sync_compatible
654
+ @inject_client
655
+ async def write(self, obj: R = NotSet, client: "PrefectClient" = None) -> None:
656
+ """
657
+ Write the result to the storage block.
658
+ """
659
+
660
+ if self._persisted:
661
+ # don't double write or overwrite
662
+ return
663
+
664
+ # load objects from a cache
665
+
666
+ # first the object itself
667
+ if obj is NotSet and not self.has_cached_object():
668
+ raise ValueError("Cannot write a result that has no object cached.")
669
+ obj = obj if obj is not NotSet else self._cache
670
+
671
+ # next, the storage block
672
+ storage_block = self._storage_block
673
+ if storage_block is None:
674
+ block_document = await client.read_block_document(self.storage_block_id)
675
+ storage_block = Block._from_block_document(block_document)
676
+
677
+ # finally, the serializer
678
+ serializer = self._serializer
679
+ if serializer is None:
680
+ # this could error if the serializer requires kwargs
681
+ serializer = Serializer(type=self.serializer_type)
682
+
683
+ data = serializer.dumps(obj)
684
+ blob = PersistedResultBlob(
685
+ serializer=serializer, data=data, expiration=self.expiration
686
+ )
687
+ await storage_block.write_path(self.storage_key, content=blob.to_bytes())
688
+ self._persisted = True
689
+
690
+ if not self._should_cache_object:
691
+ self._cache = NotSet
692
+
633
693
  @classmethod
634
694
  @sync_compatible
635
695
  async def create(
@@ -641,6 +701,7 @@ class PersistedResult(BaseResult):
641
701
  serializer: Serializer,
642
702
  cache_object: bool = True,
643
703
  expiration: Optional[DateTime] = None,
704
+ defer_persistence: bool = False,
644
705
  ) -> "PersistedResult[R]":
645
706
  """
646
707
  Create a new result reference from a user's object.
@@ -650,19 +711,13 @@ class PersistedResult(BaseResult):
650
711
  """
651
712
  assert (
652
713
  storage_block_id is not None
653
- ), "Unexpected storage block ID. Was it persisted?"
654
- data = serializer.dumps(obj)
655
- blob = PersistedResultBlob(
656
- serializer=serializer, data=data, expiration=expiration
657
- )
714
+ ), "Unexpected storage block ID. Was it saved?"
658
715
 
659
716
  key = storage_key_fn()
660
717
  if not isinstance(key, str):
661
718
  raise TypeError(
662
719
  f"Expected type 'str' for result storage key; got value {key!r}"
663
720
  )
664
- await storage_block.write_path(key, content=blob.to_bytes())
665
-
666
721
  description = f"Result of type `{type(obj).__name__}`"
667
722
  uri = cls._infer_path(storage_block, key)
668
723
  if uri:
@@ -682,12 +737,23 @@ class PersistedResult(BaseResult):
682
737
  expiration=expiration,
683
738
  )
684
739
 
685
- if cache_object:
740
+ if cache_object and not defer_persistence:
686
741
  # Attach the object to the result so it's available without deserialization
687
- result._cache_object(obj)
742
+ result._cache_object(
743
+ obj, storage_block=storage_block, serializer=serializer
744
+ )
688
745
 
689
746
  object.__setattr__(result, "_should_cache_object", cache_object)
690
747
 
748
+ if not defer_persistence:
749
+ await result.write(obj=obj)
750
+ else:
751
+ # we must cache temporarily to allow for writing later
752
+ # the cache will be removed on write
753
+ result._cache_object(
754
+ obj, storage_block=storage_block, serializer=serializer
755
+ )
756
+
691
757
  return result
692
758
 
693
759
 
@@ -703,6 +769,9 @@ class PersistedResultBlob(BaseModel):
703
769
  prefect_version: str = Field(default=prefect.__version__)
704
770
  expiration: Optional[DateTime] = None
705
771
 
772
+ def load(self) -> Any:
773
+ return self.serializer.loads(self.data)
774
+
706
775
  def to_bytes(self) -> bytes:
707
776
  return self.model_dump_json(serialize_as_any=True).encode()
708
777
 
prefect/runner/runner.py CHANGED
@@ -45,7 +45,7 @@ import threading
45
45
  from copy import deepcopy
46
46
  from functools import partial
47
47
  from pathlib import Path
48
- from typing import Callable, Dict, Iterable, List, Optional, Set, Union
48
+ from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Set, Union
49
49
  from uuid import UUID, uuid4
50
50
 
51
51
  import anyio
@@ -75,7 +75,6 @@ from prefect.deployments.runner import (
75
75
  EntrypointType,
76
76
  RunnerDeployment,
77
77
  )
78
- from prefect.deployments.schedules import FlexibleScheduleList
79
78
  from prefect.events import DeploymentTriggerTypes, TriggerTypes
80
79
  from prefect.exceptions import Abort, ObjectNotFound
81
80
  from prefect.flows import Flow, load_flow_from_flow_run
@@ -98,6 +97,9 @@ from prefect.utilities.engine import propose_state
98
97
  from prefect.utilities.processutils import _register_signal, run_process
99
98
  from prefect.utilities.services import critical_service_loop
100
99
 
100
+ if TYPE_CHECKING:
101
+ from prefect.client.types.flexible_schedule_list import FlexibleScheduleList
102
+
101
103
  __all__ = ["Runner"]
102
104
 
103
105
 
@@ -221,7 +223,7 @@ class Runner:
221
223
  cron: Optional[Union[Iterable[str], str]] = None,
222
224
  rrule: Optional[Union[Iterable[str], str]] = None,
223
225
  paused: Optional[bool] = None,
224
- schedules: Optional[FlexibleScheduleList] = None,
226
+ schedules: Optional["FlexibleScheduleList"] = None,
225
227
  schedule: Optional[SCHEDULE_TYPES] = None,
226
228
  is_schedule_active: Optional[bool] = None,
227
229
  parameters: Optional[dict] = None,
prefect/runner/server.py CHANGED
@@ -10,7 +10,7 @@ from typing_extensions import Literal
10
10
  from prefect._internal.schemas.validators import validate_values_conform_to_schema
11
11
  from prefect.client.orchestration import get_client
12
12
  from prefect.exceptions import MissingFlowError, ScriptError
13
- from prefect.flows import Flow, load_flow_from_entrypoint, load_flows_from_script
13
+ from prefect.flows import Flow, load_flow_from_entrypoint
14
14
  from prefect.logging import get_logger
15
15
  from prefect.runner.utils import (
16
16
  inject_schemas_into_openapi,
@@ -24,6 +24,7 @@ from prefect.settings import (
24
24
  PREFECT_RUNNER_SERVER_PORT,
25
25
  )
26
26
  from prefect.utilities.asyncutils import sync_compatible
27
+ from prefect.utilities.importtools import load_script_as_module
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from prefect.client.schemas.responses import DeploymentResponse
@@ -155,7 +156,10 @@ async def get_subflow_schemas(runner: "Runner") -> Dict[str, Dict]:
155
156
  continue
156
157
 
157
158
  script = deployment.entrypoint.split(":")[0]
158
- subflows = load_flows_from_script(script)
159
+ module = load_script_as_module(script)
160
+ subflows = [
161
+ obj for obj in module.__dict__.values() if isinstance(obj, Flow)
162
+ ]
159
163
  for flow in subflows:
160
164
  schemas[flow.name] = flow.parameters.model_dump()
161
165
 
prefect/settings.py CHANGED
@@ -1541,7 +1541,7 @@ The maximum number of retries to queue for submission.
1541
1541
 
1542
1542
  PREFECT_TASK_SCHEDULING_PENDING_TASK_TIMEOUT = Setting(
1543
1543
  timedelta,
1544
- default=timedelta(seconds=30),
1544
+ default=timedelta(0),
1545
1545
  )
1546
1546
  """
1547
1547
  How long before a PENDING task are made available to another task worker. In practice,
prefect/states.py CHANGED
@@ -209,6 +209,7 @@ async def return_value_to_state(
209
209
  result_factory: ResultFactory,
210
210
  key: Optional[str] = None,
211
211
  expiration: Optional[datetime.datetime] = None,
212
+ defer_persistence: bool = False,
212
213
  ) -> State[R]:
213
214
  """
214
215
  Given a return value from a user's function, create a `State` the run should
@@ -242,7 +243,10 @@ async def return_value_to_state(
242
243
  # to update the data to the correct type
243
244
  if not isinstance(state.data, BaseResult):
244
245
  state.data = await result_factory.create_result(
245
- state.data, key=key, expiration=expiration
246
+ state.data,
247
+ key=key,
248
+ expiration=expiration,
249
+ defer_persistence=defer_persistence,
246
250
  )
247
251
 
248
252
  return state
@@ -284,7 +288,10 @@ async def return_value_to_state(
284
288
  type=new_state_type,
285
289
  message=message,
286
290
  data=await result_factory.create_result(
287
- retval, key=key, expiration=expiration
291
+ retval,
292
+ key=key,
293
+ expiration=expiration,
294
+ defer_persistence=defer_persistence,
288
295
  ),
289
296
  )
290
297
 
@@ -300,7 +307,10 @@ async def return_value_to_state(
300
307
  else:
301
308
  return Completed(
302
309
  data=await result_factory.create_result(
303
- data, key=key, expiration=expiration
310
+ data,
311
+ key=key,
312
+ expiration=expiration,
313
+ defer_persistence=defer_persistence,
304
314
  )
305
315
  )
306
316
 
prefect/task_engine.py CHANGED
@@ -310,6 +310,8 @@ class TaskRunEngine(Generic[P, R]):
310
310
  result_factory=result_factory,
311
311
  key=transaction.key,
312
312
  expiration=expiration,
313
+ # defer persistence to transaction commit
314
+ defer_persistence=True,
313
315
  )
314
316
  )
315
317
  transaction.stage(
@@ -417,9 +419,7 @@ class TaskRunEngine(Generic[P, R]):
417
419
  log_prints=log_prints,
418
420
  task_run=self.task_run,
419
421
  parameters=self.parameters,
420
- result_factory=run_coro_as_sync(
421
- ResultFactory.from_autonomous_task(self.task)
422
- ), # type: ignore
422
+ result_factory=run_coro_as_sync(ResultFactory.from_task(self.task)), # type: ignore
423
423
  client=client,
424
424
  )
425
425
  )
@@ -467,9 +467,6 @@ class TaskRunEngine(Generic[P, R]):
467
467
  extra_task_inputs=dependencies,
468
468
  )
469
469
  )
470
- self.logger.info(
471
- f"Created task run {self.task_run.name!r} for task {self.task.name!r}"
472
- )
473
470
  # Emit an event to capture that the task run was in the `PENDING` state.
474
471
  self._last_event = emit_task_run_state_change_event(
475
472
  task_run=self.task_run,
@@ -478,6 +475,10 @@ class TaskRunEngine(Generic[P, R]):
478
475
  )
479
476
 
480
477
  with self.setup_run_context():
478
+ # setup_run_context might update the task run name, so log creation here
479
+ self.logger.info(
480
+ f"Created task run {self.task_run.name!r} for task {self.task.name!r}"
481
+ )
481
482
  yield self
482
483
 
483
484
  except Exception:
prefect/task_runs.py CHANGED
@@ -92,13 +92,18 @@ class TaskRunWaiter:
92
92
  raise RuntimeError("TaskRunWaiter must run on the global loop thread.")
93
93
 
94
94
  self._loop = loop_thread._loop
95
- self._consumer_task = self._loop.create_task(self._consume_events())
95
+
96
+ consumer_started = asyncio.Event()
97
+ self._consumer_task = self._loop.create_task(
98
+ self._consume_events(consumer_started)
99
+ )
100
+ asyncio.run_coroutine_threadsafe(consumer_started.wait(), self._loop)
96
101
 
97
102
  loop_thread.add_shutdown_call(create_call(self.stop))
98
103
  atexit.register(self.stop)
99
104
  self._started = True
100
105
 
101
- async def _consume_events(self):
106
+ async def _consume_events(self, consumer_started: asyncio.Event):
102
107
  async with get_events_subscriber(
103
108
  filter=EventFilter(
104
109
  event=EventNameFilter(
@@ -109,6 +114,7 @@ class TaskRunWaiter:
109
114
  )
110
115
  )
111
116
  ) as subscriber:
117
+ consumer_started.set()
112
118
  async for event in subscriber:
113
119
  try:
114
120
  self.logger.debug(
@@ -119,6 +125,7 @@ class TaskRunWaiter:
119
125
  "prefect.task-run.", ""
120
126
  )
121
127
  )
128
+
122
129
  with self._observed_completed_task_runs_lock:
123
130
  # Cache the task run ID for a short period of time to avoid
124
131
  # unnecessary waits
@@ -172,14 +179,21 @@ class TaskRunWaiter:
172
179
  # when the event is received
173
180
  instance._completion_events[task_run_id] = finished_event
174
181
 
175
- with anyio.move_on_after(delay=timeout):
176
- await from_async.wait_for_call_in_loop_thread(
177
- create_call(finished_event.wait)
178
- )
182
+ try:
183
+ # Now check one more time whether the task run arrived before we start to
184
+ # wait on it, in case it came in while we were setting up the event above.
185
+ with instance._observed_completed_task_runs_lock:
186
+ if task_run_id in instance._observed_completed_task_runs:
187
+ return
179
188
 
180
- with instance._completion_events_lock:
181
- # Remove the event from the cache after it has been waited on
182
- instance._completion_events.pop(task_run_id, None)
189
+ with anyio.move_on_after(delay=timeout):
190
+ await from_async.wait_for_call_in_loop_thread(
191
+ create_call(finished_event.wait)
192
+ )
193
+ finally:
194
+ with instance._completion_events_lock:
195
+ # Remove the event from the cache after it has been waited on
196
+ instance._completion_events.pop(task_run_id, None)
183
197
 
184
198
  @classmethod
185
199
  def instance(cls):