prefect-client 3.0.0rc19__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prefect/__init__.py +0 -3
- prefect/_internal/compatibility/migration.py +1 -1
- prefect/artifacts.py +1 -1
- prefect/blocks/core.py +8 -5
- prefect/blocks/notifications.py +10 -10
- prefect/blocks/system.py +52 -16
- prefect/blocks/webhook.py +3 -1
- prefect/client/cloud.py +57 -7
- prefect/client/collections.py +1 -1
- prefect/client/orchestration.py +68 -7
- prefect/client/schemas/objects.py +40 -2
- prefect/concurrency/asyncio.py +8 -2
- prefect/concurrency/services.py +16 -6
- prefect/concurrency/sync.py +4 -1
- prefect/context.py +7 -9
- prefect/deployments/runner.py +3 -3
- prefect/exceptions.py +12 -0
- prefect/filesystems.py +5 -3
- prefect/flow_engine.py +16 -10
- prefect/flows.py +2 -4
- prefect/futures.py +2 -1
- prefect/locking/__init__.py +0 -0
- prefect/locking/memory.py +213 -0
- prefect/locking/protocol.py +122 -0
- prefect/logging/handlers.py +4 -1
- prefect/main.py +8 -6
- prefect/records/filesystem.py +4 -2
- prefect/records/result_store.py +12 -6
- prefect/results.py +768 -363
- prefect/settings.py +24 -10
- prefect/states.py +82 -27
- prefect/task_engine.py +51 -26
- prefect/task_worker.py +6 -4
- prefect/tasks.py +24 -6
- prefect/transactions.py +57 -36
- prefect/utilities/annotations.py +4 -3
- prefect/utilities/asyncutils.py +1 -1
- prefect/utilities/callables.py +1 -3
- prefect/utilities/dispatch.py +16 -11
- prefect/utilities/schema_tools/hydration.py +13 -0
- prefect/variables.py +34 -24
- prefect/workers/base.py +78 -18
- prefect/workers/process.py +1 -3
- {prefect_client-3.0.0rc19.dist-info → prefect_client-3.0.1.dist-info}/METADATA +2 -2
- {prefect_client-3.0.0rc19.dist-info → prefect_client-3.0.1.dist-info}/RECORD +48 -46
- prefect/manifests.py +0 -21
- {prefect_client-3.0.0rc19.dist-info → prefect_client-3.0.1.dist-info}/LICENSE +0 -0
- {prefect_client-3.0.0rc19.dist-info → prefect_client-3.0.1.dist-info}/WHEEL +0 -0
- {prefect_client-3.0.0rc19.dist-info → prefect_client-3.0.1.dist-info}/top_level.txt +0 -0
prefect/results.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
import abc
|
2
2
|
import inspect
|
3
|
+
import os
|
4
|
+
import socket
|
5
|
+
import threading
|
3
6
|
import uuid
|
4
7
|
from functools import partial
|
5
8
|
from typing import (
|
6
9
|
TYPE_CHECKING,
|
7
10
|
Any,
|
8
|
-
Awaitable,
|
9
11
|
Callable,
|
10
12
|
Dict,
|
11
13
|
Generic,
|
@@ -17,7 +19,15 @@ from typing import (
|
|
17
19
|
)
|
18
20
|
from uuid import UUID
|
19
21
|
|
20
|
-
from pydantic import
|
22
|
+
from pydantic import (
|
23
|
+
BaseModel,
|
24
|
+
ConfigDict,
|
25
|
+
Field,
|
26
|
+
PrivateAttr,
|
27
|
+
ValidationError,
|
28
|
+
model_serializer,
|
29
|
+
model_validator,
|
30
|
+
)
|
21
31
|
from pydantic_core import PydanticUndefinedType
|
22
32
|
from pydantic_extra_types.pendulum_dt import DateTime
|
23
33
|
from typing_extensions import ParamSpec, Self
|
@@ -25,13 +35,18 @@ from typing_extensions import ParamSpec, Self
|
|
25
35
|
import prefect
|
26
36
|
from prefect.blocks.core import Block
|
27
37
|
from prefect.client.utilities import inject_client
|
28
|
-
from prefect.exceptions import
|
38
|
+
from prefect.exceptions import (
|
39
|
+
ConfigurationError,
|
40
|
+
MissingContextError,
|
41
|
+
SerializationError,
|
42
|
+
)
|
29
43
|
from prefect.filesystems import (
|
30
44
|
LocalFileSystem,
|
31
45
|
WritableFileSystem,
|
32
46
|
)
|
47
|
+
from prefect.locking.protocol import LockManager
|
33
48
|
from prefect.logging import get_logger
|
34
|
-
from prefect.serializers import Serializer
|
49
|
+
from prefect.serializers import PickleSerializer, Serializer
|
35
50
|
from prefect.settings import (
|
36
51
|
PREFECT_DEFAULT_RESULT_STORAGE_BLOCK,
|
37
52
|
PREFECT_LOCAL_STORAGE_PATH,
|
@@ -46,6 +61,7 @@ from prefect.utilities.pydantic import get_dispatch_key, lookup_type, register_b
|
|
46
61
|
if TYPE_CHECKING:
|
47
62
|
from prefect import Flow, Task
|
48
63
|
from prefect.client.orchestration import PrefectClient
|
64
|
+
from prefect.transactions import IsolationLevel
|
49
65
|
|
50
66
|
|
51
67
|
ResultStorage = Union[WritableFileSystem, str]
|
@@ -65,18 +81,66 @@ _default_storages: Dict[Tuple[str, str], WritableFileSystem] = {}
|
|
65
81
|
|
66
82
|
|
67
83
|
@sync_compatible
|
68
|
-
async def get_default_result_storage() ->
|
84
|
+
async def get_default_result_storage() -> WritableFileSystem:
|
69
85
|
"""
|
70
86
|
Generate a default file system for result storage.
|
71
87
|
"""
|
72
88
|
default_block = PREFECT_DEFAULT_RESULT_STORAGE_BLOCK.value()
|
73
89
|
|
74
90
|
if default_block is not None:
|
75
|
-
return await
|
91
|
+
return await resolve_result_storage(default_block)
|
76
92
|
|
77
93
|
# otherwise, use the local file system
|
78
94
|
basepath = PREFECT_LOCAL_STORAGE_PATH.value()
|
79
|
-
return LocalFileSystem(basepath=basepath)
|
95
|
+
return LocalFileSystem(basepath=str(basepath))
|
96
|
+
|
97
|
+
|
98
|
+
@sync_compatible
|
99
|
+
async def resolve_result_storage(
|
100
|
+
result_storage: ResultStorage,
|
101
|
+
) -> WritableFileSystem:
|
102
|
+
"""
|
103
|
+
Resolve one of the valid `ResultStorage` input types into a saved block
|
104
|
+
document id and an instance of the block.
|
105
|
+
"""
|
106
|
+
from prefect.client.orchestration import get_client
|
107
|
+
|
108
|
+
client = get_client()
|
109
|
+
if isinstance(result_storage, Block):
|
110
|
+
storage_block = result_storage
|
111
|
+
|
112
|
+
if storage_block._block_document_id is not None:
|
113
|
+
# Avoid saving the block if it already has an identifier assigned
|
114
|
+
storage_block_id = storage_block._block_document_id
|
115
|
+
else:
|
116
|
+
storage_block_id = None
|
117
|
+
elif isinstance(result_storage, str):
|
118
|
+
storage_block = await Block.load(result_storage, client=client)
|
119
|
+
storage_block_id = storage_block._block_document_id
|
120
|
+
assert storage_block_id is not None, "Loaded storage blocks must have ids"
|
121
|
+
else:
|
122
|
+
raise TypeError(
|
123
|
+
"Result storage must be one of the following types: 'UUID', 'Block', "
|
124
|
+
f"'str'. Got unsupported type {type(result_storage).__name__!r}."
|
125
|
+
)
|
126
|
+
|
127
|
+
return storage_block
|
128
|
+
|
129
|
+
|
130
|
+
def resolve_serializer(serializer: ResultSerializer) -> Serializer:
|
131
|
+
"""
|
132
|
+
Resolve one of the valid `ResultSerializer` input types into a serializer
|
133
|
+
instance.
|
134
|
+
"""
|
135
|
+
if isinstance(serializer, Serializer):
|
136
|
+
return serializer
|
137
|
+
elif isinstance(serializer, str):
|
138
|
+
return Serializer(type=serializer)
|
139
|
+
else:
|
140
|
+
raise TypeError(
|
141
|
+
"Result serializer must be one of the following types: 'Serializer', "
|
142
|
+
f"'str'. Got unsupported type {type(serializer).__name__!r}."
|
143
|
+
)
|
80
144
|
|
81
145
|
|
82
146
|
async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
|
@@ -93,11 +157,11 @@ async def get_or_create_default_task_scheduling_storage() -> ResultStorage:
|
|
93
157
|
return LocalFileSystem(basepath=basepath)
|
94
158
|
|
95
159
|
|
96
|
-
def get_default_result_serializer() ->
|
160
|
+
def get_default_result_serializer() -> Serializer:
|
97
161
|
"""
|
98
162
|
Generate a default file system for result storage.
|
99
163
|
"""
|
100
|
-
return PREFECT_RESULTS_DEFAULT_SERIALIZER.value()
|
164
|
+
return resolve_serializer(PREFECT_RESULTS_DEFAULT_SERIALIZER.value())
|
101
165
|
|
102
166
|
|
103
167
|
def get_default_persist_setting() -> bool:
|
@@ -114,217 +178,498 @@ def _format_user_supplied_storage_key(key: str) -> str:
|
|
114
178
|
return key.format(**runtime_vars, parameters=prefect.runtime.task_run.parameters)
|
115
179
|
|
116
180
|
|
117
|
-
class
|
181
|
+
class ResultStore(BaseModel):
|
118
182
|
"""
|
119
|
-
|
183
|
+
Manages the storage and retrieval of results.
|
184
|
+
|
185
|
+
Attributes:
|
186
|
+
result_storage: The storage for result records. If not provided, the default
|
187
|
+
result storage will be used.
|
188
|
+
metadata_storage: The storage for result record metadata. If not provided,
|
189
|
+
the metadata will be stored alongside the results.
|
190
|
+
lock_manager: The lock manager to use for locking result records. If not provided,
|
191
|
+
the store cannot be used in transactions with the SERIALIZABLE isolation level.
|
192
|
+
persist_result: Whether to persist results.
|
193
|
+
cache_result_in_memory: Whether to cache results in memory.
|
194
|
+
serializer: The serializer to use for results.
|
195
|
+
storage_key_fn: The function to generate storage keys.
|
120
196
|
"""
|
121
197
|
|
122
|
-
|
123
|
-
cache_result_in_memory: bool
|
124
|
-
serializer: Serializer
|
125
|
-
storage_block_id: Optional[uuid.UUID] = None
|
126
|
-
storage_block: WritableFileSystem
|
127
|
-
storage_key_fn: Callable[[], str]
|
198
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
128
199
|
|
129
|
-
|
130
|
-
|
131
|
-
|
200
|
+
result_storage: Optional[WritableFileSystem] = Field(default=None)
|
201
|
+
metadata_storage: Optional[WritableFileSystem] = Field(default=None)
|
202
|
+
lock_manager: Optional[LockManager] = Field(default=None)
|
203
|
+
persist_result: bool = Field(default_factory=get_default_persist_setting)
|
204
|
+
cache_result_in_memory: bool = Field(default=True)
|
205
|
+
serializer: Serializer = Field(default_factory=get_default_result_serializer)
|
206
|
+
storage_key_fn: Callable[[], str] = Field(default=DEFAULT_STORAGE_KEY_FN)
|
207
|
+
|
208
|
+
@property
|
209
|
+
def result_storage_block_id(self) -> Optional[UUID]:
|
210
|
+
if self.result_storage is None:
|
211
|
+
return None
|
212
|
+
return self.result_storage._block_document_id
|
213
|
+
|
214
|
+
@sync_compatible
|
215
|
+
async def update_for_flow(self, flow: "Flow") -> Self:
|
132
216
|
"""
|
133
|
-
Create a new result
|
217
|
+
Create a new result store for a flow with updated settings.
|
218
|
+
|
219
|
+
Args:
|
220
|
+
flow: The flow to update the result store for.
|
134
221
|
|
135
|
-
|
136
|
-
|
222
|
+
Returns:
|
223
|
+
An updated result store.
|
137
224
|
"""
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
225
|
+
update = {}
|
226
|
+
if flow.result_storage is not None:
|
227
|
+
update["result_storage"] = await resolve_result_storage(flow.result_storage)
|
228
|
+
if flow.result_serializer is not None:
|
229
|
+
update["serializer"] = resolve_serializer(flow.result_serializer)
|
230
|
+
if flow.persist_result is not None:
|
231
|
+
update["persist_result"] = flow.persist_result
|
232
|
+
if flow.cache_result_in_memory is not None:
|
233
|
+
update["cache_result_in_memory"] = flow.cache_result_in_memory
|
234
|
+
if self.result_storage is None and update.get("result_storage") is None:
|
235
|
+
update["result_storage"] = await get_default_result_storage()
|
236
|
+
return self.model_copy(update=update)
|
142
237
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
kwargs.setdefault("cache_result_in_memory", True)
|
148
|
-
kwargs.setdefault("storage_key_fn", DEFAULT_STORAGE_KEY_FN)
|
238
|
+
@sync_compatible
|
239
|
+
async def update_for_task(self: Self, task: "Task") -> Self:
|
240
|
+
"""
|
241
|
+
Create a new result store for a task.
|
149
242
|
|
150
|
-
|
243
|
+
Args:
|
244
|
+
task: The task to update the result store for.
|
151
245
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
result_serializer=flow.result_serializer
|
168
|
-
or ctx.result_factory.serializer,
|
169
|
-
persist_result=flow.persist_result,
|
170
|
-
cache_result_in_memory=flow.cache_result_in_memory,
|
171
|
-
storage_key_fn=DEFAULT_STORAGE_KEY_FN,
|
172
|
-
client=client,
|
246
|
+
Returns:
|
247
|
+
An updated result store.
|
248
|
+
"""
|
249
|
+
update = {}
|
250
|
+
if task.result_storage is not None:
|
251
|
+
update["result_storage"] = await resolve_result_storage(task.result_storage)
|
252
|
+
if task.result_serializer is not None:
|
253
|
+
update["serializer"] = resolve_serializer(task.result_serializer)
|
254
|
+
if task.persist_result is not None:
|
255
|
+
update["persist_result"] = task.persist_result
|
256
|
+
if task.cache_result_in_memory is not None:
|
257
|
+
update["cache_result_in_memory"] = task.cache_result_in_memory
|
258
|
+
if task.result_storage_key is not None:
|
259
|
+
update["storage_key_fn"] = partial(
|
260
|
+
_format_user_supplied_storage_key, task.result_storage_key
|
173
261
|
)
|
262
|
+
if self.result_storage is None and update.get("result_storage") is None:
|
263
|
+
update["result_storage"] = await get_default_result_storage()
|
264
|
+
return self.model_copy(update=update)
|
265
|
+
|
266
|
+
@staticmethod
|
267
|
+
def generate_default_holder() -> str:
|
268
|
+
"""
|
269
|
+
Generate a default holder string using hostname, PID, and thread ID.
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
str: A unique identifier string.
|
273
|
+
"""
|
274
|
+
hostname = socket.gethostname()
|
275
|
+
pid = os.getpid()
|
276
|
+
thread_name = threading.current_thread().name
|
277
|
+
thread_id = threading.get_ident()
|
278
|
+
return f"{hostname}:{pid}:{thread_id}:{thread_name}"
|
279
|
+
|
280
|
+
@sync_compatible
|
281
|
+
async def _exists(self, key: str) -> bool:
|
282
|
+
"""
|
283
|
+
Check if a result record exists in storage.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
key: The key to check for the existence of a result record.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
bool: True if the result record exists, False otherwise.
|
290
|
+
"""
|
291
|
+
if self.metadata_storage is not None:
|
292
|
+
# TODO: Add an `exists` method to commonly used storage blocks
|
293
|
+
# so the entire payload doesn't need to be read
|
294
|
+
try:
|
295
|
+
metadata_content = await self.metadata_storage.read_path(key)
|
296
|
+
return metadata_content is not None
|
297
|
+
except Exception:
|
298
|
+
return False
|
174
299
|
else:
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
300
|
+
try:
|
301
|
+
content = await self.result_storage.read_path(key)
|
302
|
+
return content is not None
|
303
|
+
except Exception:
|
304
|
+
return False
|
305
|
+
|
306
|
+
def exists(self, key: str) -> bool:
|
307
|
+
"""
|
308
|
+
Check if a result record exists in storage.
|
309
|
+
|
310
|
+
Args:
|
311
|
+
key: The key to check for the existence of a result record.
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
bool: True if the result record exists, False otherwise.
|
315
|
+
"""
|
316
|
+
return self._exists(key=key, _sync=True)
|
317
|
+
|
318
|
+
async def aexists(self, key: str) -> bool:
|
319
|
+
"""
|
320
|
+
Check if a result record exists in storage.
|
321
|
+
|
322
|
+
Args:
|
323
|
+
key: The key to check for the existence of a result record.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
bool: True if the result record exists, False otherwise.
|
327
|
+
"""
|
328
|
+
return await self._exists(key=key, _sync=False)
|
329
|
+
|
330
|
+
@sync_compatible
|
331
|
+
async def _read(self, key: str, holder: str) -> "ResultRecord":
|
332
|
+
"""
|
333
|
+
Read a result record from storage.
|
334
|
+
|
335
|
+
This is the internal implementation. Use `read` or `aread` for synchronous and
|
336
|
+
asynchronous result reading respectively.
|
337
|
+
|
338
|
+
Args:
|
339
|
+
key: The key to read the result record from.
|
340
|
+
holder: The holder of the lock if a lock was set on the record.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
A result record.
|
344
|
+
"""
|
345
|
+
if self.lock_manager is not None and not self.is_lock_holder(key, holder):
|
346
|
+
await self.await_for_lock(key)
|
347
|
+
|
348
|
+
if self.result_storage is None:
|
349
|
+
self.result_storage = await get_default_result_storage()
|
350
|
+
|
351
|
+
if self.metadata_storage is not None:
|
352
|
+
metadata_content = await self.metadata_storage.read_path(key)
|
353
|
+
metadata = ResultRecordMetadata.load_bytes(metadata_content)
|
354
|
+
assert (
|
355
|
+
metadata.storage_key is not None
|
356
|
+
), "Did not find storage key in metadata"
|
357
|
+
result_content = await self.result_storage.read_path(metadata.storage_key)
|
358
|
+
return ResultRecord.deserialize_from_result_and_metadata(
|
359
|
+
result=result_content, metadata=metadata_content
|
185
360
|
)
|
361
|
+
else:
|
362
|
+
content = await self.result_storage.read_path(key)
|
363
|
+
return ResultRecord.deserialize(content)
|
186
364
|
|
187
|
-
|
188
|
-
@inject_client
|
189
|
-
async def from_task(
|
190
|
-
cls: Type[Self], task: "Task", client: "PrefectClient" = None
|
191
|
-
) -> Self:
|
365
|
+
def read(self, key: str, holder: Optional[str] = None) -> "ResultRecord":
|
192
366
|
"""
|
193
|
-
|
367
|
+
Read a result record from storage.
|
368
|
+
|
369
|
+
Args:
|
370
|
+
key: The key to read the result record from.
|
371
|
+
holder: The holder of the lock if a lock was set on the record.
|
372
|
+
Returns:
|
373
|
+
A result record.
|
194
374
|
"""
|
195
|
-
|
375
|
+
holder = holder or self.generate_default_holder()
|
376
|
+
return self._read(key=key, holder=holder, _sync=True)
|
196
377
|
|
197
|
-
|
198
|
-
@inject_client
|
199
|
-
async def from_autonomous_task(
|
200
|
-
cls: Type[Self], task: "Task[P, R]", client: "PrefectClient" = None
|
201
|
-
) -> Self:
|
378
|
+
async def aread(self, key: str, holder: Optional[str] = None) -> "ResultRecord":
|
202
379
|
"""
|
203
|
-
|
380
|
+
Read a result record from storage.
|
381
|
+
|
382
|
+
Args:
|
383
|
+
key: The key to read the result record from.
|
384
|
+
holder: The holder of the lock if a lock was set on the record.
|
385
|
+
Returns:
|
386
|
+
A result record.
|
204
387
|
"""
|
205
|
-
|
206
|
-
|
388
|
+
holder = holder or self.generate_default_holder()
|
389
|
+
return await self._read(key=key, holder=holder, _sync=False)
|
390
|
+
|
391
|
+
def create_result_record(
|
392
|
+
self,
|
393
|
+
key: str,
|
394
|
+
obj: Any,
|
395
|
+
expiration: Optional[DateTime] = None,
|
396
|
+
):
|
397
|
+
"""
|
398
|
+
Create a result record.
|
399
|
+
|
400
|
+
Args:
|
401
|
+
key: The key to create the result record for.
|
402
|
+
obj: The object to create the result record for.
|
403
|
+
expiration: The expiration time for the result record.
|
404
|
+
"""
|
405
|
+
key = key or self.storage_key_fn()
|
406
|
+
|
407
|
+
return ResultRecord(
|
408
|
+
result=obj,
|
409
|
+
metadata=ResultRecordMetadata(
|
410
|
+
serializer=self.serializer,
|
411
|
+
expiration=expiration,
|
412
|
+
storage_key=key,
|
413
|
+
storage_block_id=self.result_storage_block_id,
|
414
|
+
),
|
207
415
|
)
|
208
416
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
417
|
+
def write(
|
418
|
+
self,
|
419
|
+
key: str,
|
420
|
+
obj: Any,
|
421
|
+
expiration: Optional[DateTime] = None,
|
422
|
+
holder: Optional[str] = None,
|
423
|
+
):
|
424
|
+
"""
|
425
|
+
Write a result to storage.
|
426
|
+
|
427
|
+
Handles the creation of a `ResultRecord` and its serialization to storage.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
key: The key to write the result record to.
|
431
|
+
obj: The object to write to storage.
|
432
|
+
expiration: The expiration time for the result record.
|
433
|
+
holder: The holder of the lock if a lock was set on the record.
|
434
|
+
"""
|
435
|
+
holder = holder or self.generate_default_holder()
|
436
|
+
return self.persist_result_record(
|
437
|
+
result_record=self.create_result_record(
|
438
|
+
key=key, obj=obj, expiration=expiration
|
439
|
+
),
|
440
|
+
holder=holder,
|
225
441
|
)
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
442
|
+
|
443
|
+
async def awrite(
|
444
|
+
self,
|
445
|
+
key: str,
|
446
|
+
obj: Any,
|
447
|
+
expiration: Optional[DateTime] = None,
|
448
|
+
holder: Optional[str] = None,
|
449
|
+
):
|
450
|
+
"""
|
451
|
+
Write a result to storage.
|
452
|
+
|
453
|
+
Args:
|
454
|
+
key: The key to write the result record to.
|
455
|
+
obj: The object to write to storage.
|
456
|
+
expiration: The expiration time for the result record.
|
457
|
+
holder: The holder of the lock if a lock was set on the record.
|
458
|
+
"""
|
459
|
+
holder = holder or self.generate_default_holder()
|
460
|
+
return await self.apersist_result_record(
|
461
|
+
result_record=self.create_result_record(
|
462
|
+
key=key, obj=obj, expiration=expiration
|
463
|
+
),
|
464
|
+
holder=holder,
|
230
465
|
)
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
466
|
+
|
467
|
+
@sync_compatible
|
468
|
+
async def _persist_result_record(self, result_record: "ResultRecord", holder: str):
|
469
|
+
"""
|
470
|
+
Persist a result record to storage.
|
471
|
+
|
472
|
+
Args:
|
473
|
+
result_record: The result record to persist.
|
474
|
+
holder: The holder of the lock if a lock was set on the record.
|
475
|
+
"""
|
476
|
+
assert (
|
477
|
+
result_record.metadata.storage_key is not None
|
478
|
+
), "Storage key is required on result record"
|
479
|
+
|
480
|
+
key = result_record.metadata.storage_key
|
481
|
+
if (
|
482
|
+
self.lock_manager is not None
|
483
|
+
and self.is_locked(key)
|
484
|
+
and not self.is_lock_holder(key, holder)
|
485
|
+
):
|
486
|
+
raise RuntimeError(
|
487
|
+
f"Cannot write to result record with key {key} because it is locked by "
|
488
|
+
f"another holder."
|
489
|
+
)
|
490
|
+
if self.result_storage is None:
|
491
|
+
self.result_storage = await get_default_result_storage()
|
492
|
+
|
493
|
+
# If metadata storage is configured, write result and metadata separately
|
494
|
+
if self.metadata_storage is not None:
|
495
|
+
await self.result_storage.write_path(
|
496
|
+
result_record.metadata.storage_key,
|
497
|
+
content=result_record.serialize_result(),
|
236
498
|
)
|
499
|
+
await self.metadata_storage.write_path(
|
500
|
+
result_record.metadata.storage_key,
|
501
|
+
content=result_record.serialize_metadata(),
|
502
|
+
)
|
503
|
+
# Otherwise, write the result metadata and result together
|
237
504
|
else:
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
return await cls.from_settings(
|
243
|
-
result_storage=result_storage,
|
244
|
-
result_serializer=result_serializer,
|
245
|
-
persist_result=persist_result,
|
246
|
-
cache_result_in_memory=cache_result_in_memory,
|
247
|
-
client=client,
|
248
|
-
storage_key_fn=(
|
249
|
-
partial(_format_user_supplied_storage_key, task.result_storage_key)
|
250
|
-
if task.result_storage_key is not None
|
251
|
-
else DEFAULT_STORAGE_KEY_FN
|
252
|
-
),
|
253
|
-
)
|
505
|
+
await self.result_storage.write_path(
|
506
|
+
result_record.metadata.storage_key, content=result_record.serialize()
|
507
|
+
)
|
254
508
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
result_serializer: ResultSerializer,
|
261
|
-
persist_result: Optional[bool],
|
262
|
-
cache_result_in_memory: bool,
|
263
|
-
storage_key_fn: Callable[[], str],
|
264
|
-
client: "PrefectClient",
|
265
|
-
) -> Self:
|
266
|
-
if persist_result is None:
|
267
|
-
persist_result = get_default_persist_setting()
|
509
|
+
def persist_result_record(
|
510
|
+
self, result_record: "ResultRecord", holder: Optional[str] = None
|
511
|
+
):
|
512
|
+
"""
|
513
|
+
Persist a result record to storage.
|
268
514
|
|
269
|
-
|
270
|
-
|
515
|
+
Args:
|
516
|
+
result_record: The result record to persist.
|
517
|
+
"""
|
518
|
+
holder = holder or self.generate_default_holder()
|
519
|
+
return self._persist_result_record(
|
520
|
+
result_record=result_record, holder=holder, _sync=True
|
271
521
|
)
|
272
|
-
serializer = cls.resolve_serializer(result_serializer)
|
273
522
|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
523
|
+
async def apersist_result_record(
|
524
|
+
self, result_record: "ResultRecord", holder: Optional[str] = None
|
525
|
+
):
|
526
|
+
"""
|
527
|
+
Persist a result record to storage.
|
528
|
+
|
529
|
+
Args:
|
530
|
+
result_record: The result record to persist.
|
531
|
+
"""
|
532
|
+
holder = holder or self.generate_default_holder()
|
533
|
+
return await self._persist_result_record(
|
534
|
+
result_record=result_record, holder=holder, _sync=False
|
281
535
|
)
|
282
536
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
"""
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
else:
|
300
|
-
storage_block_id = None
|
301
|
-
elif isinstance(result_storage, str):
|
302
|
-
storage_block = await Block.load(result_storage, client=client)
|
303
|
-
storage_block_id = storage_block._block_document_id
|
304
|
-
assert storage_block_id is not None, "Loaded storage blocks must have ids"
|
537
|
+
def supports_isolation_level(self, level: "IsolationLevel") -> bool:
|
538
|
+
"""
|
539
|
+
Check if the result store supports a given isolation level.
|
540
|
+
|
541
|
+
Args:
|
542
|
+
level: The isolation level to check.
|
543
|
+
|
544
|
+
Returns:
|
545
|
+
bool: True if the isolation level is supported, False otherwise.
|
546
|
+
"""
|
547
|
+
from prefect.transactions import IsolationLevel
|
548
|
+
|
549
|
+
if level == IsolationLevel.READ_COMMITTED:
|
550
|
+
return True
|
551
|
+
elif level == IsolationLevel.SERIALIZABLE:
|
552
|
+
return self.lock_manager is not None
|
305
553
|
else:
|
306
|
-
raise
|
307
|
-
|
308
|
-
|
554
|
+
raise ValueError(f"Unsupported isolation level: {level}")
|
555
|
+
|
556
|
+
def acquire_lock(
|
557
|
+
self, key: str, holder: Optional[str] = None, timeout: Optional[float] = None
|
558
|
+
) -> bool:
|
559
|
+
"""
|
560
|
+
Acquire a lock for a result record.
|
561
|
+
|
562
|
+
Args:
|
563
|
+
key: The key to acquire the lock for.
|
564
|
+
holder: The holder of the lock. If not provided, a default holder based on the
|
565
|
+
current host, process, and thread will be used.
|
566
|
+
timeout: The timeout for the lock.
|
567
|
+
|
568
|
+
Returns:
|
569
|
+
bool: True if the lock was successfully acquired; False otherwise.
|
570
|
+
"""
|
571
|
+
holder = holder or self.generate_default_holder()
|
572
|
+
if self.lock_manager is None:
|
573
|
+
raise ConfigurationError(
|
574
|
+
"Result store is not configured with a lock manager. Please set"
|
575
|
+
" a lock manager when creating the result store to enable locking."
|
309
576
|
)
|
577
|
+
return self.lock_manager.acquire_lock(key, holder, timeout)
|
310
578
|
|
311
|
-
|
579
|
+
async def aacquire_lock(
|
580
|
+
self, key: str, holder: Optional[str] = None, timeout: Optional[float] = None
|
581
|
+
) -> bool:
|
582
|
+
"""
|
583
|
+
Acquire a lock for a result record.
|
312
584
|
|
313
|
-
|
314
|
-
|
585
|
+
Args:
|
586
|
+
key: The key to acquire the lock for.
|
587
|
+
holder: The holder of the lock. If not provided, a default holder based on the
|
588
|
+
current host, process, and thread will be used.
|
589
|
+
timeout: The timeout for the lock.
|
590
|
+
|
591
|
+
Returns:
|
592
|
+
bool: True if the lock was successfully acquired; False otherwise.
|
315
593
|
"""
|
316
|
-
|
317
|
-
|
594
|
+
holder = holder or self.generate_default_holder()
|
595
|
+
if self.lock_manager is None:
|
596
|
+
raise ConfigurationError(
|
597
|
+
"Result store is not configured with a lock manager. Please set"
|
598
|
+
" a lock manager when creating the result store to enable locking."
|
599
|
+
)
|
600
|
+
|
601
|
+
return await self.lock_manager.aacquire_lock(key, holder, timeout)
|
602
|
+
|
603
|
+
def release_lock(self, key: str, holder: Optional[str] = None):
|
318
604
|
"""
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
605
|
+
Release a lock for a result record.
|
606
|
+
|
607
|
+
Args:
|
608
|
+
key: The key to release the lock for.
|
609
|
+
holder: The holder of the lock. Must match the holder that acquired the lock.
|
610
|
+
If not provided, a default holder based on the current host, process, and
|
611
|
+
thread will be used.
|
612
|
+
"""
|
613
|
+
holder = holder or self.generate_default_holder()
|
614
|
+
if self.lock_manager is None:
|
615
|
+
raise ConfigurationError(
|
616
|
+
"Result store is not configured with a lock manager. Please set"
|
617
|
+
" a lock manager when creating the result store to enable locking."
|
618
|
+
)
|
619
|
+
return self.lock_manager.release_lock(key, holder)
|
620
|
+
|
621
|
+
def is_locked(self, key: str) -> bool:
|
622
|
+
"""
|
623
|
+
Check if a result record is locked.
|
624
|
+
"""
|
625
|
+
if self.lock_manager is None:
|
626
|
+
raise ConfigurationError(
|
627
|
+
"Result store is not configured with a lock manager. Please set"
|
628
|
+
" a lock manager when creating the result store to enable locking."
|
629
|
+
)
|
630
|
+
return self.lock_manager.is_locked(key)
|
631
|
+
|
632
|
+
def is_lock_holder(self, key: str, holder: Optional[str] = None) -> bool:
|
633
|
+
"""
|
634
|
+
Check if the current holder is the lock holder for the result record.
|
635
|
+
|
636
|
+
Args:
|
637
|
+
key: The key to check the lock for.
|
638
|
+
holder: The holder of the lock. If not provided, a default holder based on the
|
639
|
+
current host, process, and thread will be used.
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
bool: True if the current holder is the lock holder; False otherwise.
|
643
|
+
"""
|
644
|
+
holder = holder or self.generate_default_holder()
|
645
|
+
if self.lock_manager is None:
|
646
|
+
raise ConfigurationError(
|
647
|
+
"Result store is not configured with a lock manager. Please set"
|
648
|
+
" a lock manager when creating the result store to enable locking."
|
649
|
+
)
|
650
|
+
return self.lock_manager.is_lock_holder(key, holder)
|
651
|
+
|
652
|
+
def wait_for_lock(self, key: str, timeout: Optional[float] = None) -> bool:
|
653
|
+
"""
|
654
|
+
Wait for the corresponding transaction record to become free.
|
655
|
+
"""
|
656
|
+
if self.lock_manager is None:
|
657
|
+
raise ConfigurationError(
|
658
|
+
"Result store is not configured with a lock manager. Please set"
|
659
|
+
" a lock manager when creating the result store to enable locking."
|
327
660
|
)
|
661
|
+
return self.lock_manager.wait_for_lock(key, timeout)
|
662
|
+
|
663
|
+
async def await_for_lock(self, key: str, timeout: Optional[float] = None) -> bool:
|
664
|
+
"""
|
665
|
+
Wait for the corresponding transaction record to become free.
|
666
|
+
"""
|
667
|
+
if self.lock_manager is None:
|
668
|
+
raise ConfigurationError(
|
669
|
+
"Result store is not configured with a lock manager. Please set"
|
670
|
+
" a lock manager when creating the result store to enable locking."
|
671
|
+
)
|
672
|
+
return await self.lock_manager.await_for_lock(key, timeout)
|
328
673
|
|
329
674
|
@sync_compatible
|
330
675
|
async def create_result(
|
@@ -332,22 +677,13 @@ class ResultFactory(BaseModel):
|
|
332
677
|
obj: R,
|
333
678
|
key: Optional[str] = None,
|
334
679
|
expiration: Optional[DateTime] = None,
|
335
|
-
defer_persistence: bool = False,
|
336
680
|
) -> Union[R, "BaseResult[R]"]:
|
337
681
|
"""
|
338
|
-
Create a
|
339
|
-
|
340
|
-
If persistence is disabled, the object is wrapped in an `UnpersistedResult` and
|
341
|
-
returned.
|
342
|
-
|
343
|
-
If persistence is enabled the object is serialized, persisted to storage, and a reference is returned.
|
682
|
+
Create a `PersistedResult` for the given object.
|
344
683
|
"""
|
345
684
|
# Null objects are "cached" in memory at no cost
|
346
685
|
should_cache_object = self.cache_result_in_memory or obj is None
|
347
686
|
|
348
|
-
if not self.persist_result:
|
349
|
-
return await UnpersistedResult.create(obj, cache_object=should_cache_object)
|
350
|
-
|
351
687
|
if key:
|
352
688
|
|
353
689
|
def key_fn():
|
@@ -357,31 +693,225 @@ class ResultFactory(BaseModel):
|
|
357
693
|
else:
|
358
694
|
storage_key_fn = self.storage_key_fn
|
359
695
|
|
696
|
+
if self.result_storage is None:
|
697
|
+
self.result_storage = await get_default_result_storage()
|
698
|
+
|
360
699
|
return await PersistedResult.create(
|
361
700
|
obj,
|
362
|
-
storage_block=self.
|
363
|
-
storage_block_id=self.
|
701
|
+
storage_block=self.result_storage,
|
702
|
+
storage_block_id=self.result_storage_block_id,
|
364
703
|
storage_key_fn=storage_key_fn,
|
365
704
|
serializer=self.serializer,
|
366
705
|
cache_object=should_cache_object,
|
367
706
|
expiration=expiration,
|
368
|
-
|
707
|
+
serialize_to_none=not self.persist_result,
|
369
708
|
)
|
370
709
|
|
710
|
+
# TODO: These two methods need to find a new home
|
711
|
+
|
371
712
|
@sync_compatible
|
372
713
|
async def store_parameters(self, identifier: UUID, parameters: Dict[str, Any]):
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
714
|
+
record = ResultRecord(
|
715
|
+
result=parameters,
|
716
|
+
metadata=ResultRecordMetadata(
|
717
|
+
serializer=self.serializer, storage_key=str(identifier)
|
718
|
+
),
|
719
|
+
)
|
720
|
+
await self.result_storage.write_path(
|
721
|
+
f"parameters/{identifier}", content=record.serialize()
|
377
722
|
)
|
378
723
|
|
379
724
|
@sync_compatible
|
380
725
|
async def read_parameters(self, identifier: UUID) -> Dict[str, Any]:
|
381
|
-
|
382
|
-
await self.
|
726
|
+
record = ResultRecord.deserialize(
|
727
|
+
await self.result_storage.read_path(f"parameters/{identifier}")
|
728
|
+
)
|
729
|
+
return record.result
|
730
|
+
|
731
|
+
|
732
|
+
def get_current_result_store() -> ResultStore:
|
733
|
+
"""
|
734
|
+
Get the current result store.
|
735
|
+
"""
|
736
|
+
from prefect.context import get_run_context
|
737
|
+
|
738
|
+
try:
|
739
|
+
run_context = get_run_context()
|
740
|
+
except MissingContextError:
|
741
|
+
result_store = ResultStore()
|
742
|
+
else:
|
743
|
+
result_store = run_context.result_store
|
744
|
+
return result_store
|
745
|
+
|
746
|
+
|
747
|
+
class ResultRecordMetadata(BaseModel):
|
748
|
+
"""
|
749
|
+
Metadata for a result record.
|
750
|
+
"""
|
751
|
+
|
752
|
+
storage_key: Optional[str] = Field(
|
753
|
+
default=None
|
754
|
+
) # optional for backwards compatibility
|
755
|
+
expiration: Optional[DateTime] = Field(default=None)
|
756
|
+
serializer: Serializer = Field(default_factory=PickleSerializer)
|
757
|
+
prefect_version: str = Field(default=prefect.__version__)
|
758
|
+
storage_block_id: Optional[uuid.UUID] = Field(default=None)
|
759
|
+
|
760
|
+
def dump_bytes(self) -> bytes:
|
761
|
+
"""
|
762
|
+
Serialize the metadata to bytes.
|
763
|
+
|
764
|
+
Returns:
|
765
|
+
bytes: the serialized metadata
|
766
|
+
"""
|
767
|
+
return self.model_dump_json(serialize_as_any=True).encode()
|
768
|
+
|
769
|
+
@classmethod
|
770
|
+
def load_bytes(cls, data: bytes) -> "ResultRecordMetadata":
|
771
|
+
"""
|
772
|
+
Deserialize metadata from bytes.
|
773
|
+
|
774
|
+
Args:
|
775
|
+
data: the serialized metadata
|
776
|
+
|
777
|
+
Returns:
|
778
|
+
ResultRecordMetadata: the deserialized metadata
|
779
|
+
"""
|
780
|
+
return cls.model_validate_json(data)
|
781
|
+
|
782
|
+
|
783
|
+
class ResultRecord(BaseModel, Generic[R]):
|
784
|
+
"""
|
785
|
+
A record of a result.
|
786
|
+
"""
|
787
|
+
|
788
|
+
metadata: ResultRecordMetadata
|
789
|
+
result: R
|
790
|
+
|
791
|
+
@property
|
792
|
+
def expiration(self) -> Optional[DateTime]:
|
793
|
+
return self.metadata.expiration
|
794
|
+
|
795
|
+
@property
|
796
|
+
def serializer(self) -> Serializer:
|
797
|
+
return self.metadata.serializer
|
798
|
+
|
799
|
+
def serialize_result(self) -> bytes:
|
800
|
+
try:
|
801
|
+
data = self.serializer.dumps(self.result)
|
802
|
+
except Exception as exc:
|
803
|
+
extra_info = (
|
804
|
+
'You can try a different serializer (e.g. result_serializer="json") '
|
805
|
+
"or disabling persistence (persist_result=False) for this flow or task."
|
806
|
+
)
|
807
|
+
# check if this is a known issue with cloudpickle and pydantic
|
808
|
+
# and add extra information to help the user recover
|
809
|
+
|
810
|
+
if (
|
811
|
+
isinstance(exc, TypeError)
|
812
|
+
and isinstance(self.result, BaseModel)
|
813
|
+
and str(exc).startswith("cannot pickle")
|
814
|
+
):
|
815
|
+
try:
|
816
|
+
from IPython import get_ipython
|
817
|
+
|
818
|
+
if get_ipython() is not None:
|
819
|
+
extra_info = inspect.cleandoc(
|
820
|
+
"""
|
821
|
+
This is a known issue in Pydantic that prevents
|
822
|
+
locally-defined (non-imported) models from being
|
823
|
+
serialized by cloudpickle in IPython/Jupyter
|
824
|
+
environments. Please see
|
825
|
+
https://github.com/pydantic/pydantic/issues/8232 for
|
826
|
+
more information. To fix the issue, either: (1) move
|
827
|
+
your Pydantic class definition to an importable
|
828
|
+
location, (2) use the JSON serializer for your flow
|
829
|
+
or task (`result_serializer="json"`), or (3)
|
830
|
+
disable result persistence for your flow or task
|
831
|
+
(`persist_result=False`).
|
832
|
+
"""
|
833
|
+
).replace("\n", " ")
|
834
|
+
except ImportError:
|
835
|
+
pass
|
836
|
+
raise SerializationError(
|
837
|
+
f"Failed to serialize object of type {type(self.result).__name__!r} with "
|
838
|
+
f"serializer {self.serializer.type!r}. {extra_info}"
|
839
|
+
) from exc
|
840
|
+
|
841
|
+
return data
|
842
|
+
|
843
|
+
@model_validator(mode="before")
|
844
|
+
@classmethod
|
845
|
+
def coerce_old_format(cls, value: Any):
|
846
|
+
if isinstance(value, dict):
|
847
|
+
if "data" in value:
|
848
|
+
value["result"] = value.pop("data")
|
849
|
+
if "metadata" not in value:
|
850
|
+
value["metadata"] = {}
|
851
|
+
if "expiration" in value:
|
852
|
+
value["metadata"]["expiration"] = value.pop("expiration")
|
853
|
+
if "serializer" in value:
|
854
|
+
value["metadata"]["serializer"] = value.pop("serializer")
|
855
|
+
if "prefect_version" in value:
|
856
|
+
value["metadata"]["prefect_version"] = value.pop("prefect_version")
|
857
|
+
return value
|
858
|
+
|
859
|
+
def serialize_metadata(self) -> bytes:
|
860
|
+
return self.metadata.dump_bytes()
|
861
|
+
|
862
|
+
def serialize(
|
863
|
+
self,
|
864
|
+
) -> bytes:
|
865
|
+
"""
|
866
|
+
Serialize the record to bytes.
|
867
|
+
|
868
|
+
Returns:
|
869
|
+
bytes: the serialized record
|
870
|
+
|
871
|
+
"""
|
872
|
+
return (
|
873
|
+
self.model_copy(update={"result": self.serialize_result()})
|
874
|
+
.model_dump_json(serialize_as_any=True)
|
875
|
+
.encode()
|
876
|
+
)
|
877
|
+
|
878
|
+
@classmethod
|
879
|
+
def deserialize(cls, data: bytes) -> "ResultRecord[R]":
|
880
|
+
"""
|
881
|
+
Deserialize a record from bytes.
|
882
|
+
|
883
|
+
Args:
|
884
|
+
data: the serialized record
|
885
|
+
|
886
|
+
Returns:
|
887
|
+
ResultRecord: the deserialized record
|
888
|
+
"""
|
889
|
+
instance = cls.model_validate_json(data)
|
890
|
+
if isinstance(instance.result, bytes):
|
891
|
+
instance.result = instance.serializer.loads(instance.result)
|
892
|
+
elif isinstance(instance.result, str):
|
893
|
+
instance.result = instance.serializer.loads(instance.result.encode())
|
894
|
+
return instance
|
895
|
+
|
896
|
+
@classmethod
|
897
|
+
def deserialize_from_result_and_metadata(
|
898
|
+
cls, result: bytes, metadata: bytes
|
899
|
+
) -> "ResultRecord[R]":
|
900
|
+
"""
|
901
|
+
Deserialize a record from separate result and metadata bytes.
|
902
|
+
|
903
|
+
Args:
|
904
|
+
result: the result
|
905
|
+
metadata: the serialized metadata
|
906
|
+
|
907
|
+
Returns:
|
908
|
+
ResultRecord: the deserialized record
|
909
|
+
"""
|
910
|
+
result_record_metadata = ResultRecordMetadata.load_bytes(metadata)
|
911
|
+
return cls(
|
912
|
+
metadata=result_record_metadata,
|
913
|
+
result=result_record_metadata.serializer.loads(result),
|
383
914
|
)
|
384
|
-
return self.serializer.loads(blob.data)
|
385
915
|
|
386
916
|
|
387
917
|
@register_base_type
|
@@ -390,7 +920,9 @@ class BaseResult(BaseModel, abc.ABC, Generic[R]):
|
|
390
920
|
type: str
|
391
921
|
|
392
922
|
def __init__(self, **data: Any) -> None:
|
393
|
-
type_string =
|
923
|
+
type_string = (
|
924
|
+
get_dispatch_key(self) if type(self) is not BaseResult else "__base__"
|
925
|
+
)
|
394
926
|
data.setdefault("type", type_string)
|
395
927
|
super().__init__(**data)
|
396
928
|
|
@@ -432,40 +964,12 @@ class BaseResult(BaseModel, abc.ABC, Generic[R]):
|
|
432
964
|
return cls.__name__ if isinstance(default, PydanticUndefinedType) else default
|
433
965
|
|
434
966
|
|
435
|
-
class UnpersistedResult(BaseResult):
|
436
|
-
"""
|
437
|
-
Result type for results that are not persisted outside of local memory.
|
438
|
-
"""
|
439
|
-
|
440
|
-
type: str = "unpersisted"
|
441
|
-
|
442
|
-
@sync_compatible
|
443
|
-
async def get(self) -> R:
|
444
|
-
if self.has_cached_object():
|
445
|
-
return self._cache
|
446
|
-
|
447
|
-
raise MissingResult("The result was not persisted and is no longer available.")
|
448
|
-
|
449
|
-
@classmethod
|
450
|
-
@sync_compatible
|
451
|
-
async def create(
|
452
|
-
cls: "Type[UnpersistedResult]",
|
453
|
-
obj: R,
|
454
|
-
cache_object: bool = True,
|
455
|
-
) -> "UnpersistedResult[R]":
|
456
|
-
result = cls()
|
457
|
-
# Only store the object in local memory, it will not be sent to the API
|
458
|
-
if cache_object:
|
459
|
-
result._cache_object(obj)
|
460
|
-
return result
|
461
|
-
|
462
|
-
|
463
967
|
class PersistedResult(BaseResult):
|
464
968
|
"""
|
465
969
|
Result type which stores a reference to a persisted result.
|
466
970
|
|
467
971
|
When created, the user's object is serialized and stored. The format for the content
|
468
|
-
is defined by `
|
972
|
+
is defined by `ResultRecord`. This reference contains metadata necessary for retrieval
|
469
973
|
of the object, such as a reference to the storage block and the key where the
|
470
974
|
content was written.
|
471
975
|
"""
|
@@ -476,12 +980,19 @@ class PersistedResult(BaseResult):
|
|
476
980
|
storage_key: str
|
477
981
|
storage_block_id: Optional[uuid.UUID] = None
|
478
982
|
expiration: Optional[DateTime] = None
|
983
|
+
serialize_to_none: bool = False
|
479
984
|
|
480
|
-
_should_cache_object: bool = PrivateAttr(default=True)
|
481
985
|
_persisted: bool = PrivateAttr(default=False)
|
986
|
+
_should_cache_object: bool = PrivateAttr(default=True)
|
482
987
|
_storage_block: WritableFileSystem = PrivateAttr(default=None)
|
483
988
|
_serializer: Serializer = PrivateAttr(default=None)
|
484
989
|
|
990
|
+
@model_serializer(mode="wrap")
|
991
|
+
def serialize_model(self, handler, info):
|
992
|
+
if self.serialize_to_none:
|
993
|
+
return None
|
994
|
+
return handler(self, info)
|
995
|
+
|
485
996
|
def _cache_object(
|
486
997
|
self,
|
487
998
|
obj: Any,
|
@@ -505,28 +1016,28 @@ class PersistedResult(BaseResult):
|
|
505
1016
|
|
506
1017
|
@sync_compatible
|
507
1018
|
@inject_client
|
508
|
-
async def get(
|
1019
|
+
async def get(
|
1020
|
+
self, ignore_cache: bool = False, client: "PrefectClient" = None
|
1021
|
+
) -> R:
|
509
1022
|
"""
|
510
1023
|
Retrieve the data and deserialize it into the original object.
|
511
1024
|
"""
|
512
|
-
if self.has_cached_object():
|
1025
|
+
if self.has_cached_object() and not ignore_cache:
|
513
1026
|
return self._cache
|
514
1027
|
|
515
|
-
|
516
|
-
|
517
|
-
|
1028
|
+
result_store_kwargs = {}
|
1029
|
+
if self._serializer:
|
1030
|
+
result_store_kwargs["serializer"] = resolve_serializer(self._serializer)
|
1031
|
+
storage_block = await self._get_storage_block(client=client)
|
1032
|
+
result_store = ResultStore(result_storage=storage_block, **result_store_kwargs)
|
1033
|
+
|
1034
|
+
record = await result_store.aread(self.storage_key)
|
1035
|
+
self.expiration = record.expiration
|
518
1036
|
|
519
1037
|
if self._should_cache_object:
|
520
|
-
self._cache_object(
|
521
|
-
|
522
|
-
return obj
|
1038
|
+
self._cache_object(record.result)
|
523
1039
|
|
524
|
-
|
525
|
-
async def _read_blob(self, client: "PrefectClient") -> "PersistedResultBlob":
|
526
|
-
block = await self._get_storage_block(client=client)
|
527
|
-
content = await block.read_path(self.storage_key)
|
528
|
-
blob = PersistedResultBlob.model_validate_json(content)
|
529
|
-
return blob
|
1040
|
+
return record.result
|
530
1041
|
|
531
1042
|
@staticmethod
|
532
1043
|
def _infer_path(storage_block, key) -> str:
|
@@ -547,7 +1058,7 @@ class PersistedResult(BaseResult):
|
|
547
1058
|
Write the result to the storage block.
|
548
1059
|
"""
|
549
1060
|
|
550
|
-
if self._persisted:
|
1061
|
+
if self._persisted or self.serialize_to_none:
|
551
1062
|
# don't double write or overwrite
|
552
1063
|
return
|
553
1064
|
|
@@ -567,50 +1078,11 @@ class PersistedResult(BaseResult):
|
|
567
1078
|
# this could error if the serializer requires kwargs
|
568
1079
|
serializer = Serializer(type=self.serializer_type)
|
569
1080
|
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
extra_info = (
|
574
|
-
'You can try a different serializer (e.g. result_serializer="json") '
|
575
|
-
"or disabling persistence (persist_result=False) for this flow or task."
|
576
|
-
)
|
577
|
-
# check if this is a known issue with cloudpickle and pydantic
|
578
|
-
# and add extra information to help the user recover
|
579
|
-
|
580
|
-
if (
|
581
|
-
isinstance(exc, TypeError)
|
582
|
-
and isinstance(obj, BaseModel)
|
583
|
-
and str(exc).startswith("cannot pickle")
|
584
|
-
):
|
585
|
-
try:
|
586
|
-
from IPython import get_ipython
|
587
|
-
|
588
|
-
if get_ipython() is not None:
|
589
|
-
extra_info = inspect.cleandoc(
|
590
|
-
"""
|
591
|
-
This is a known issue in Pydantic that prevents
|
592
|
-
locally-defined (non-imported) models from being
|
593
|
-
serialized by cloudpickle in IPython/Jupyter
|
594
|
-
environments. Please see
|
595
|
-
https://github.com/pydantic/pydantic/issues/8232 for
|
596
|
-
more information. To fix the issue, either: (1) move
|
597
|
-
your Pydantic class definition to an importable
|
598
|
-
location, (2) use the JSON serializer for your flow
|
599
|
-
or task (`result_serializer="json"`), or (3)
|
600
|
-
disable result persistence for your flow or task
|
601
|
-
(`persist_result=False`).
|
602
|
-
"""
|
603
|
-
).replace("\n", " ")
|
604
|
-
except ImportError:
|
605
|
-
pass
|
606
|
-
raise ValueError(
|
607
|
-
f"Failed to serialize object of type {type(obj).__name__!r} with "
|
608
|
-
f"serializer {serializer.type!r}. {extra_info}"
|
609
|
-
) from exc
|
610
|
-
blob = PersistedResultBlob(
|
611
|
-
serializer=serializer, data=data, expiration=self.expiration
|
1081
|
+
result_store = ResultStore(result_storage=storage_block, serializer=serializer)
|
1082
|
+
await result_store.awrite(
|
1083
|
+
obj=obj, key=self.storage_key, expiration=self.expiration
|
612
1084
|
)
|
613
|
-
|
1085
|
+
|
614
1086
|
self._persisted = True
|
615
1087
|
|
616
1088
|
if not self._should_cache_object:
|
@@ -627,7 +1099,7 @@ class PersistedResult(BaseResult):
|
|
627
1099
|
storage_block_id: Optional[uuid.UUID] = None,
|
628
1100
|
cache_object: bool = True,
|
629
1101
|
expiration: Optional[DateTime] = None,
|
630
|
-
|
1102
|
+
serialize_to_none: bool = False,
|
631
1103
|
) -> "PersistedResult[R]":
|
632
1104
|
"""
|
633
1105
|
Create a new result reference from a user's object.
|
@@ -651,24 +1123,13 @@ class PersistedResult(BaseResult):
|
|
651
1123
|
storage_block_id=storage_block_id,
|
652
1124
|
storage_key=key,
|
653
1125
|
expiration=expiration,
|
1126
|
+
serialize_to_none=serialize_to_none,
|
654
1127
|
)
|
655
1128
|
|
656
|
-
if cache_object and not defer_persistence:
|
657
|
-
# Attach the object to the result so it's available without deserialization
|
658
|
-
result._cache_object(
|
659
|
-
obj, storage_block=storage_block, serializer=serializer
|
660
|
-
)
|
661
|
-
|
662
1129
|
object.__setattr__(result, "_should_cache_object", cache_object)
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
else:
|
667
|
-
# we must cache temporarily to allow for writing later
|
668
|
-
# the cache will be removed on write
|
669
|
-
result._cache_object(
|
670
|
-
obj, storage_block=storage_block, serializer=serializer
|
671
|
-
)
|
1130
|
+
# we must cache temporarily to allow for writing later
|
1131
|
+
# the cache will be removed on write
|
1132
|
+
result._cache_object(obj, storage_block=storage_block, serializer=serializer)
|
672
1133
|
|
673
1134
|
return result
|
674
1135
|
|
@@ -682,59 +1143,3 @@ class PersistedResult(BaseResult):
|
|
682
1143
|
and self.storage_block_id == other.storage_block_id
|
683
1144
|
and self.expiration == other.expiration
|
684
1145
|
)
|
685
|
-
|
686
|
-
|
687
|
-
class PersistedResultBlob(BaseModel):
|
688
|
-
"""
|
689
|
-
The format of the content stored by a persisted result.
|
690
|
-
|
691
|
-
Typically, this is written to a file as bytes.
|
692
|
-
"""
|
693
|
-
|
694
|
-
serializer: Serializer
|
695
|
-
data: bytes
|
696
|
-
prefect_version: str = Field(default=prefect.__version__)
|
697
|
-
expiration: Optional[DateTime] = None
|
698
|
-
|
699
|
-
def load(self) -> Any:
|
700
|
-
return self.serializer.loads(self.data)
|
701
|
-
|
702
|
-
def to_bytes(self) -> bytes:
|
703
|
-
return self.model_dump_json(serialize_as_any=True).encode()
|
704
|
-
|
705
|
-
|
706
|
-
class UnknownResult(BaseResult):
|
707
|
-
"""
|
708
|
-
Result type for unknown results. Typically used to represent the result
|
709
|
-
of tasks that were forced from a failure state into a completed state.
|
710
|
-
|
711
|
-
The value for this result is always None and is not persisted to external
|
712
|
-
result storage, but orchestration treats the result the same as persisted
|
713
|
-
results when determining orchestration rules, such as whether to rerun a
|
714
|
-
completed task.
|
715
|
-
"""
|
716
|
-
|
717
|
-
type: str = "unknown"
|
718
|
-
value: None
|
719
|
-
|
720
|
-
def has_cached_object(self) -> bool:
|
721
|
-
# This result type always has the object cached in memory
|
722
|
-
return True
|
723
|
-
|
724
|
-
@sync_compatible
|
725
|
-
async def get(self) -> R:
|
726
|
-
return self.value
|
727
|
-
|
728
|
-
@classmethod
|
729
|
-
@sync_compatible
|
730
|
-
async def create(
|
731
|
-
cls: "Type[UnknownResult]",
|
732
|
-
obj: R = None,
|
733
|
-
) -> "UnknownResult[R]":
|
734
|
-
if obj is not None:
|
735
|
-
raise TypeError(
|
736
|
-
f"Unsupported type {type(obj).__name__!r} for unknown result. "
|
737
|
-
"Only None is supported."
|
738
|
-
)
|
739
|
-
|
740
|
-
return cls(value=obj)
|