flyte 2.0.0b32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +108 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +195 -0
- flyte/_bin/serve.py +178 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +147 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/local_cache.py +216 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +121 -0
- flyte/_code_bundle/_packaging.py +218 -0
- flyte/_code_bundle/_utils.py +347 -0
- flyte/_code_bundle/bundle.py +266 -0
- flyte/_constants.py +1 -0
- flyte/_context.py +155 -0
- flyte/_custom_context.py +73 -0
- flyte/_debug/__init__.py +0 -0
- flyte/_debug/constants.py +38 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +307 -0
- flyte/_deploy.py +408 -0
- flyte/_deployer.py +109 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +122 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +8 -0
- flyte/_image.py +1055 -0
- flyte/_initialize.py +628 -0
- flyte/_interface.py +119 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +129 -0
- flyte/_internal/controllers/_local_controller.py +239 -0
- flyte/_internal/controllers/_trace.py +48 -0
- flyte/_internal/controllers/remote/__init__.py +58 -0
- flyte/_internal/controllers/remote/_action.py +211 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +583 -0
- flyte/_internal/controllers/remote/_core.py +465 -0
- flyte/_internal/controllers/remote/_informer.py +381 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +3 -0
- flyte/_internal/imagebuild/docker_builder.py +706 -0
- flyte/_internal/imagebuild/image_builder.py +277 -0
- flyte/_internal/imagebuild/remote_builder.py +386 -0
- flyte/_internal/imagebuild/utils.py +78 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +21 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +486 -0
- flyte/_internal/runtime/entrypoints.py +204 -0
- flyte/_internal/runtime/io.py +188 -0
- flyte/_internal/runtime/resources_serde.py +152 -0
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +193 -0
- flyte/_internal/runtime/task_serde.py +362 -0
- flyte/_internal/runtime/taskrunner.py +209 -0
- flyte/_internal/runtime/trigger_serde.py +160 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_keyring/__init__.py +0 -0
- flyte/_keyring/file.py +115 -0
- flyte/_logging.py +300 -0
- flyte/_map.py +312 -0
- flyte/_module.py +72 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +473 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +102 -0
- flyte/_run.py +724 -0
- flyte/_secret.py +96 -0
- flyte/_task.py +550 -0
- flyte/_task_environment.py +316 -0
- flyte/_task_plugins.py +47 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +119 -0
- flyte/_trigger.py +1000 -0
- flyte/_utils/__init__.py +30 -0
- flyte/_utils/asyn.py +121 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +27 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/module_loader.py +104 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +34 -0
- flyte/app/__init__.py +22 -0
- flyte/app/_app_environment.py +157 -0
- flyte/app/_deploy.py +125 -0
- flyte/app/_input.py +160 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +347 -0
- flyte/app/_types.py +101 -0
- flyte/app/extras/__init__.py +3 -0
- flyte/app/extras/_fastapi.py +151 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +468 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +293 -0
- flyte/cli/_gen.py +176 -0
- flyte/cli/_get.py +370 -0
- flyte/cli/_option.py +33 -0
- flyte/cli/_params.py +554 -0
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_run.py +597 -0
- flyte/cli/_serve.py +64 -0
- flyte/cli/_update.py +37 -0
- flyte/cli/_user.py +17 -0
- flyte/cli/main.py +221 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +248 -0
- flyte/config/_internal.py +73 -0
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +270 -0
- flyte/connectors/_server.py +197 -0
- flyte/connectors/utils.py +135 -0
- flyte/errors.py +243 -0
- flyte/extend.py +19 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +286 -0
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +21 -0
- flyte/io/__init__.py +29 -0
- flyte/io/_dataframe/__init__.py +131 -0
- flyte/io/_dataframe/basic_dfs.py +223 -0
- flyte/io/_dataframe/dataframe.py +1026 -0
- flyte/io/_dir.py +910 -0
- flyte/io/_file.py +914 -0
- flyte/io/_hashing_io.py +342 -0
- flyte/models.py +479 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +35 -0
- flyte/remote/_action.py +738 -0
- flyte/remote/_app.py +57 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +189 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +403 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +213 -0
- flyte/remote/_client/auth/_client_config.py +85 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +152 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +128 -0
- flyte/remote/_common.py +30 -0
- flyte/remote/_console.py +19 -0
- flyte/remote/_data.py +161 -0
- flyte/remote/_logs.py +185 -0
- flyte/remote/_project.py +88 -0
- flyte/remote/_run.py +386 -0
- flyte/remote/_secret.py +142 -0
- flyte/remote/_task.py +527 -0
- flyte/remote/_trigger.py +306 -0
- flyte/remote/_user.py +33 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +182 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +36 -0
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +274 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +456 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +375 -0
- flyte/types/__init__.py +52 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +145 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +119 -0
- flyte/types/_type_engine.py +2254 -0
- flyte/types/_utils.py +80 -0
- flyte-2.0.0b32.data/scripts/debug.py +38 -0
- flyte-2.0.0b32.data/scripts/runtime.py +195 -0
- flyte-2.0.0b32.dist-info/METADATA +351 -0
- flyte-2.0.0b32.dist-info/RECORD +204 -0
- flyte-2.0.0b32.dist-info/WHEEL +5 -0
- flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
- flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
- flyte-2.0.0b32.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
import random
|
|
6
|
+
import tempfile
|
|
7
|
+
import typing
|
|
8
|
+
from typing import AsyncGenerator, Optional
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
import fsspec
|
|
12
|
+
import obstore
|
|
13
|
+
from fsspec.asyn import AsyncFileSystem
|
|
14
|
+
from fsspec.utils import get_protocol
|
|
15
|
+
from obstore.exceptions import GenericError
|
|
16
|
+
from obstore.fsspec import register
|
|
17
|
+
|
|
18
|
+
from flyte._initialize import get_storage
|
|
19
|
+
from flyte._logging import logger
|
|
20
|
+
from flyte.errors import InitializationError, OnlyAsyncIOSupportedError
|
|
21
|
+
|
|
22
|
+
if typing.TYPE_CHECKING:
|
|
23
|
+
from obstore import AsyncReadableFile, AsyncWritableFile
|
|
24
|
+
|
|
25
|
+
_OBSTORE_SUPPORTED_PROTOCOLS = ["s3", "gs", "abfs", "abfss"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _is_obstore_supported_protocol(protocol: str) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
Check if the given protocol is supported by obstore.
|
|
31
|
+
:param protocol: Protocol to check.
|
|
32
|
+
:return: True if the protocol is supported, False otherwise.
|
|
33
|
+
"""
|
|
34
|
+
return protocol in _OBSTORE_SUPPORTED_PROTOCOLS
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
|
|
38
|
+
"""
|
|
39
|
+
Let's find a replacement
|
|
40
|
+
"""
|
|
41
|
+
protocol = get_protocol(str(path))
|
|
42
|
+
if protocol is None:
|
|
43
|
+
return False
|
|
44
|
+
return protocol != "file"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def strip_file_header(path: str) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Drops file:// if it exists from the file
|
|
50
|
+
"""
|
|
51
|
+
if path.startswith("file://"):
|
|
52
|
+
return path.replace("file://", "", 1)
|
|
53
|
+
return path
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_random_local_path(file_path_or_file_name: pathlib.Path | str | None = None) -> pathlib.Path:
|
|
57
|
+
"""
|
|
58
|
+
Use file_path_or_file_name, when you want a random directory, but want to preserve the leaf file name
|
|
59
|
+
"""
|
|
60
|
+
local_tmp = pathlib.Path(tempfile.mkdtemp(prefix="flyte-tmp-"))
|
|
61
|
+
key = UUID(int=random.getrandbits(128)).hex
|
|
62
|
+
tmp_folder = local_tmp / key
|
|
63
|
+
tail = ""
|
|
64
|
+
if file_path_or_file_name:
|
|
65
|
+
_, tail = os.path.split(file_path_or_file_name)
|
|
66
|
+
if tail:
|
|
67
|
+
tmp_folder.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
return tmp_folder / tail
|
|
69
|
+
local_tmp.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
return tmp_folder
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_random_local_directory() -> pathlib.Path:
|
|
74
|
+
"""
|
|
75
|
+
:return: a random directory
|
|
76
|
+
:rtype: pathlib.Path
|
|
77
|
+
"""
|
|
78
|
+
_dir = get_random_local_path(None)
|
|
79
|
+
pathlib.Path(_dir).mkdir(parents=True, exist_ok=True)
|
|
80
|
+
return _dir
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_configured_fsspec_kwargs(
|
|
84
|
+
protocol: typing.Optional[str] = None, anonymous: bool = False
|
|
85
|
+
) -> typing.Dict[str, typing.Any]:
|
|
86
|
+
if protocol:
|
|
87
|
+
# Try to get storage config safely - may not be initialized for local operations
|
|
88
|
+
try:
|
|
89
|
+
storage_config = get_storage()
|
|
90
|
+
except InitializationError:
|
|
91
|
+
storage_config = None
|
|
92
|
+
|
|
93
|
+
match protocol:
|
|
94
|
+
case "s3":
|
|
95
|
+
# If the protocol is s3, we can use the s3 filesystem
|
|
96
|
+
from flyte.storage import S3
|
|
97
|
+
|
|
98
|
+
if storage_config and isinstance(storage_config, S3):
|
|
99
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
100
|
+
|
|
101
|
+
return S3.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
102
|
+
case "gs":
|
|
103
|
+
# If the protocol is gs, we can use the gs filesystem
|
|
104
|
+
from flyte.storage import GCS
|
|
105
|
+
|
|
106
|
+
if storage_config and isinstance(storage_config, GCS):
|
|
107
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
108
|
+
|
|
109
|
+
return GCS.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
110
|
+
case "abfs" | "abfss":
|
|
111
|
+
# If the protocol is abfs or abfss, we can use the abfs filesystem
|
|
112
|
+
from flyte.storage import ABFS
|
|
113
|
+
|
|
114
|
+
if storage_config and isinstance(storage_config, ABFS):
|
|
115
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
116
|
+
|
|
117
|
+
return ABFS.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
118
|
+
case _:
|
|
119
|
+
return {}
|
|
120
|
+
|
|
121
|
+
# If no protocol, return args from storage config if set
|
|
122
|
+
storage_config = get_storage()
|
|
123
|
+
if storage_config:
|
|
124
|
+
return storage_config.get_fsspec_kwargs(anonymous)
|
|
125
|
+
|
|
126
|
+
return {}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_underlying_filesystem(
|
|
130
|
+
protocol: typing.Optional[str] = None,
|
|
131
|
+
anonymous: bool = False,
|
|
132
|
+
path: typing.Optional[str] = None,
|
|
133
|
+
**kwargs,
|
|
134
|
+
) -> fsspec.AbstractFileSystem:
|
|
135
|
+
if protocol is None:
|
|
136
|
+
# If protocol is None, get it from the path
|
|
137
|
+
protocol = get_protocol(path)
|
|
138
|
+
|
|
139
|
+
configured_kwargs = get_configured_fsspec_kwargs(protocol, anonymous=anonymous)
|
|
140
|
+
configured_kwargs.update(kwargs)
|
|
141
|
+
|
|
142
|
+
return fsspec.filesystem(protocol, **configured_kwargs)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _get_anonymous_filesystem(from_path):
|
|
146
|
+
"""Get the anonymous file system if needed."""
|
|
147
|
+
return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def _get_obstore_bypass(from_path: str, to_path: str | pathlib.Path, recursive: bool = False, **kwargs) -> str:
|
|
151
|
+
from obstore.store import ObjectStore
|
|
152
|
+
|
|
153
|
+
from flyte.storage._parallel_reader import ObstoreParallelReader
|
|
154
|
+
|
|
155
|
+
fs = get_underlying_filesystem(path=from_path)
|
|
156
|
+
bucket, prefix = fs._split_path(from_path) # pylint: disable=W0212
|
|
157
|
+
store: ObjectStore = fs._construct_store(bucket)
|
|
158
|
+
|
|
159
|
+
download_kwargs = {}
|
|
160
|
+
if "chunk_size" in kwargs:
|
|
161
|
+
download_kwargs["chunk_size"] = kwargs["chunk_size"]
|
|
162
|
+
if "max_concurrency" in kwargs:
|
|
163
|
+
download_kwargs["max_concurrency"] = kwargs["max_concurrency"]
|
|
164
|
+
|
|
165
|
+
reader = ObstoreParallelReader(store, **download_kwargs)
|
|
166
|
+
target_path = pathlib.Path(to_path) if isinstance(to_path, str) else to_path
|
|
167
|
+
|
|
168
|
+
# if recursive, just download the prefix to the target path
|
|
169
|
+
if recursive:
|
|
170
|
+
logger.debug(f"Downloading recursively {prefix=} to {target_path=}")
|
|
171
|
+
await reader.download_files(
|
|
172
|
+
prefix,
|
|
173
|
+
target_path,
|
|
174
|
+
)
|
|
175
|
+
return str(to_path)
|
|
176
|
+
|
|
177
|
+
# if not recursive, we need to split out the file name from the prefix
|
|
178
|
+
else:
|
|
179
|
+
path_for_reader = pathlib.Path(prefix).name
|
|
180
|
+
final_prefix = pathlib.Path(prefix).parent
|
|
181
|
+
logger.debug(f"Downloading single file {final_prefix=}, {path_for_reader=} to {target_path=}")
|
|
182
|
+
await reader.download_files(
|
|
183
|
+
final_prefix,
|
|
184
|
+
target_path.parent,
|
|
185
|
+
path_for_reader,
|
|
186
|
+
destination_file_name=target_path.name,
|
|
187
|
+
)
|
|
188
|
+
return str(target_path)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
|
|
192
|
+
if not to_path:
|
|
193
|
+
name = pathlib.Path(from_path).name # may need to be adjusted for windows
|
|
194
|
+
to_path = get_random_local_path(file_path_or_file_name=name)
|
|
195
|
+
logger.debug(f"Storing file from {from_path} to {to_path}")
|
|
196
|
+
else:
|
|
197
|
+
# Only apply directory logic for single files (not recursive)
|
|
198
|
+
if not recursive:
|
|
199
|
+
to_path_str = str(to_path)
|
|
200
|
+
# Check for trailing separator BEFORE converting to Path (which normalizes and removes it)
|
|
201
|
+
ends_with_sep = to_path_str.endswith(os.sep)
|
|
202
|
+
to_path_obj = pathlib.Path(to_path)
|
|
203
|
+
|
|
204
|
+
# If path ends with os.sep or is an existing directory, append source filename
|
|
205
|
+
if ends_with_sep or (to_path_obj.exists() and to_path_obj.is_dir()):
|
|
206
|
+
source_filename = pathlib.Path(from_path).name # may need to be adjusted for windows
|
|
207
|
+
to_path = to_path_obj / source_filename
|
|
208
|
+
# For recursive=True, keep to_path as-is (it's the destination directory for contents)
|
|
209
|
+
|
|
210
|
+
file_system = get_underlying_filesystem(path=from_path)
|
|
211
|
+
|
|
212
|
+
# Check if we should use obstore bypass
|
|
213
|
+
if (
|
|
214
|
+
_is_obstore_supported_protocol(file_system.protocol)
|
|
215
|
+
and hasattr(file_system, "_split_path")
|
|
216
|
+
and hasattr(file_system, "_construct_store")
|
|
217
|
+
and recursive
|
|
218
|
+
):
|
|
219
|
+
return await _get_obstore_bypass(from_path, to_path, recursive, **kwargs)
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
|
|
223
|
+
except (OSError, GenericError) as oe:
|
|
224
|
+
logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}")
|
|
225
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
226
|
+
try:
|
|
227
|
+
exists = await file_system._exists(from_path) # pylint: disable=W0212
|
|
228
|
+
except GenericError:
|
|
229
|
+
# for obstore, as it does not raise FileNotFoundError in fsspec but GenericError
|
|
230
|
+
# force it to try get_filesystem(anonymous=True)
|
|
231
|
+
exists = True
|
|
232
|
+
else:
|
|
233
|
+
exists = file_system.exists(from_path)
|
|
234
|
+
if not exists:
|
|
235
|
+
raise AssertionError(f"Unable to load data from {from_path}")
|
|
236
|
+
file_system = _get_anonymous_filesystem(from_path)
|
|
237
|
+
logger.debug(f"Attempting anonymous get with {file_system}")
|
|
238
|
+
return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
async def _get_from_filesystem(
|
|
242
|
+
file_system: fsspec.AbstractFileSystem,
|
|
243
|
+
from_path: str | pathlib.Path,
|
|
244
|
+
to_path: str | pathlib.Path,
|
|
245
|
+
recursive: bool,
|
|
246
|
+
**kwargs,
|
|
247
|
+
):
|
|
248
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
249
|
+
dst = await file_system._get(str(from_path), str(to_path), recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
250
|
+
else:
|
|
251
|
+
dst = file_system.get(str(from_path), str(to_path), recursive=recursive, **kwargs)
|
|
252
|
+
|
|
253
|
+
if isinstance(dst, (str, pathlib.Path)):
|
|
254
|
+
return dst
|
|
255
|
+
return str(to_path)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
|
|
259
|
+
if not to_path:
|
|
260
|
+
from flyte._context import internal_ctx
|
|
261
|
+
|
|
262
|
+
ctx = internal_ctx()
|
|
263
|
+
name = pathlib.Path(from_path).name
|
|
264
|
+
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
265
|
+
|
|
266
|
+
file_system = get_underlying_filesystem(path=to_path)
|
|
267
|
+
from_path = strip_file_header(from_path)
|
|
268
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
269
|
+
dst = await file_system._put(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
270
|
+
else:
|
|
271
|
+
dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
|
|
272
|
+
if isinstance(dst, (str, pathlib.Path)):
|
|
273
|
+
return str(dst)
|
|
274
|
+
else:
|
|
275
|
+
return to_path
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
async def _open_obstore_bypass(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
|
|
279
|
+
"""
|
|
280
|
+
Simple obstore bypass for opening files. No fallbacks, obstore only.
|
|
281
|
+
"""
|
|
282
|
+
from obstore.store import ObjectStore
|
|
283
|
+
|
|
284
|
+
fs = get_underlying_filesystem(path=path)
|
|
285
|
+
bucket, file_path = fs._split_path(path) # pylint: disable=W0212
|
|
286
|
+
store: ObjectStore = fs._construct_store(bucket)
|
|
287
|
+
|
|
288
|
+
file_handle: AsyncReadableFile | AsyncWritableFile
|
|
289
|
+
|
|
290
|
+
if "w" in mode:
|
|
291
|
+
attributes = kwargs.pop("attributes", {})
|
|
292
|
+
file_handle = obstore.open_writer_async(store, file_path, attributes=attributes)
|
|
293
|
+
else: # read mode
|
|
294
|
+
buffer_size = kwargs.pop("buffer_size", 10 * 2**20)
|
|
295
|
+
file_handle = await obstore.open_reader_async(store, file_path, buffer_size=buffer_size)
|
|
296
|
+
|
|
297
|
+
return file_handle
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
async def open(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
|
|
301
|
+
"""
|
|
302
|
+
Asynchronously open a file and return an async context manager.
|
|
303
|
+
This function checks if the underlying filesystem supports obstore bypass.
|
|
304
|
+
If it does, it uses obstore to open the file. Otherwise, it falls back to
|
|
305
|
+
the standard _open function which uses AsyncFileSystem.
|
|
306
|
+
|
|
307
|
+
It will raise NotImplementedError if neither obstore nor AsyncFileSystem is supported.
|
|
308
|
+
"""
|
|
309
|
+
fs = get_underlying_filesystem(path=path)
|
|
310
|
+
|
|
311
|
+
# Check if we should use obstore bypass
|
|
312
|
+
if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
|
|
313
|
+
return await _open_obstore_bypass(path, mode, **kwargs)
|
|
314
|
+
|
|
315
|
+
# Fallback to normal open
|
|
316
|
+
if isinstance(fs, AsyncFileSystem):
|
|
317
|
+
return await fs.open_async(path, mode, **kwargs)
|
|
318
|
+
|
|
319
|
+
raise OnlyAsyncIOSupportedError(f"Filesystem {fs} does not support async operations")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
async def put_stream(
|
|
323
|
+
data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
|
|
324
|
+
) -> str:
|
|
325
|
+
"""
|
|
326
|
+
Put a stream of data to a remote location. This is useful for streaming data to a remote location.
|
|
327
|
+
Example usage:
|
|
328
|
+
```python
|
|
329
|
+
import flyte.storage as storage
|
|
330
|
+
storage.put_stream(iter([b'hello']), name="my_file.txt")
|
|
331
|
+
OR
|
|
332
|
+
storage.put_stream(iter([b'hello']), to_path="s3://my_bucket/my_file.txt")
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
:param data_iterable: Iterable of bytes to be streamed.
|
|
336
|
+
:param name: Name of the file to be created. If not provided, a random name will be generated.
|
|
337
|
+
:param to_path: Path to the remote location where the data will be stored.
|
|
338
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
339
|
+
:rtype: str
|
|
340
|
+
:return: The path to the remote location where the data was stored.
|
|
341
|
+
"""
|
|
342
|
+
if not to_path:
|
|
343
|
+
from flyte._context import internal_ctx
|
|
344
|
+
|
|
345
|
+
ctx = internal_ctx()
|
|
346
|
+
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
347
|
+
|
|
348
|
+
# Check if we should use obstore bypass
|
|
349
|
+
fs = get_underlying_filesystem(path=to_path)
|
|
350
|
+
try:
|
|
351
|
+
file_handle = typing.cast("AsyncWritableFile", await open(to_path, "wb", **kwargs))
|
|
352
|
+
if isinstance(data_iterable, bytes):
|
|
353
|
+
await file_handle.write(data_iterable)
|
|
354
|
+
else:
|
|
355
|
+
async for data in data_iterable:
|
|
356
|
+
await file_handle.write(data)
|
|
357
|
+
await file_handle.close()
|
|
358
|
+
return str(to_path)
|
|
359
|
+
except OnlyAsyncIOSupportedError:
|
|
360
|
+
pass
|
|
361
|
+
|
|
362
|
+
# Fallback to normal open
|
|
363
|
+
file_handle_io: typing.IO = fs.open(to_path, mode="wb", **kwargs)
|
|
364
|
+
if isinstance(data_iterable, bytes):
|
|
365
|
+
file_handle_io.write(data_iterable)
|
|
366
|
+
else:
|
|
367
|
+
async for data in data_iterable:
|
|
368
|
+
file_handle_io.write(data)
|
|
369
|
+
file_handle_io.close()
|
|
370
|
+
|
|
371
|
+
return str(to_path)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncGenerator[bytes, None]:
|
|
375
|
+
"""
|
|
376
|
+
Get a stream of data from a remote location.
|
|
377
|
+
This is useful for downloading streaming data from a remote location.
|
|
378
|
+
Example usage:
|
|
379
|
+
```python
|
|
380
|
+
import flyte.storage as storage
|
|
381
|
+
async for chunk in storage.get_stream(path="s3://my_bucket/my_file.txt"):
|
|
382
|
+
process(chunk)
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
:param path: Path to the remote location where the data will be downloaded.
|
|
386
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
387
|
+
:param chunk_size: Size of each chunk to be read from the file.
|
|
388
|
+
:return: An async iterator that yields chunks of bytes.
|
|
389
|
+
"""
|
|
390
|
+
# Check if we should use obstore bypass
|
|
391
|
+
fs = get_underlying_filesystem(path=path)
|
|
392
|
+
if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
|
|
393
|
+
# Set buffer_size for obstore if chunk_size is provided
|
|
394
|
+
if "buffer_size" not in kwargs:
|
|
395
|
+
kwargs["buffer_size"] = chunk_size
|
|
396
|
+
file_handle = typing.cast("AsyncReadableFile", await _open_obstore_bypass(path, "rb", **kwargs))
|
|
397
|
+
while chunk := await file_handle.read():
|
|
398
|
+
yield bytes(chunk)
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
# Fallback to normal open
|
|
402
|
+
if "block_size" not in kwargs:
|
|
403
|
+
kwargs["block_size"] = chunk_size
|
|
404
|
+
|
|
405
|
+
if isinstance(fs, AsyncFileSystem):
|
|
406
|
+
file_handle = await fs.open_async(path, "rb", **kwargs)
|
|
407
|
+
while chunk := await file_handle.read():
|
|
408
|
+
yield chunk
|
|
409
|
+
await file_handle.close()
|
|
410
|
+
return
|
|
411
|
+
|
|
412
|
+
file_handle = fs.open(path, "rb", **kwargs)
|
|
413
|
+
while chunk := file_handle.read():
|
|
414
|
+
yield chunk
|
|
415
|
+
file_handle.close()
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def join(*paths: str) -> str:
|
|
419
|
+
"""
|
|
420
|
+
Join multiple paths together. This is a wrapper around os.path.join.
|
|
421
|
+
# TODO replace with proper join with fsspec root etc
|
|
422
|
+
|
|
423
|
+
:param paths: Paths to be joined.
|
|
424
|
+
"""
|
|
425
|
+
return str(os.path.join(*paths))
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
async def exists(path: str, **kwargs) -> bool:
|
|
429
|
+
"""
|
|
430
|
+
Check if a path exists.
|
|
431
|
+
|
|
432
|
+
:param path: Path to be checked.
|
|
433
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
434
|
+
:return: True if the path exists, False otherwise.
|
|
435
|
+
"""
|
|
436
|
+
try:
|
|
437
|
+
fs = get_underlying_filesystem(path=path, **kwargs)
|
|
438
|
+
if isinstance(fs, AsyncFileSystem):
|
|
439
|
+
_ = await fs._info(path)
|
|
440
|
+
return True
|
|
441
|
+
_ = fs.info(path)
|
|
442
|
+
return True
|
|
443
|
+
except FileNotFoundError:
|
|
444
|
+
return False
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def exists_sync(path: str, **kwargs) -> bool:
|
|
448
|
+
try:
|
|
449
|
+
fs = get_underlying_filesystem(path=path, **kwargs)
|
|
450
|
+
_ = fs.info(path)
|
|
451
|
+
return True
|
|
452
|
+
except FileNotFoundError:
|
|
453
|
+
return False
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
register(_OBSTORE_SUPPORTED_PROTOCOLS, asynchronous=True)
|
flyte/storage/_utils.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# This is the default chunk size flyte will use for writing to S3 and GCS. This is set to 25MB by default and is
|
|
4
|
+
# configurable by the user if needed. This is used when put() is called on filesystems.
|
|
5
|
+
_WRITE_SIZE_CHUNK_BYTES = int(os.environ.get("_F_P_WRITE_CHUNK_SIZE", "26214400")) # 25 * 2**20
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
# Syncify Module
|
|
3
|
+
This module provides the `syncify` decorator and the `Syncify` class.
|
|
4
|
+
The decorator can be used to convert asynchronous functions or methods into synchronous ones.
|
|
5
|
+
This is useful for integrating async code into synchronous contexts.
|
|
6
|
+
|
|
7
|
+
Every asynchronous function or method wrapped with `syncify` can be called synchronously using the
|
|
8
|
+
parenthesis `()` operator, or asynchronously using the `.aio()` method.
|
|
9
|
+
|
|
10
|
+
Example::
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from flyte.syncify import syncify
|
|
14
|
+
|
|
15
|
+
@syncify
|
|
16
|
+
async def async_function(x: str) -> str:
|
|
17
|
+
return f"Hello, Async World {x}!"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# now you can call it synchronously
|
|
21
|
+
result = async_function("Async World") # Note: no .aio() needed for sync calls
|
|
22
|
+
print(result)
|
|
23
|
+
# Output: Hello, Async World Async World!
|
|
24
|
+
|
|
25
|
+
# or call it asynchronously
|
|
26
|
+
async def main():
|
|
27
|
+
result = await async_function.aio("World") # Note the use of .aio() for async calls
|
|
28
|
+
print(result)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Creating a Syncify Instance
|
|
32
|
+
```python
|
|
33
|
+
from flyte.syncify. import Syncify
|
|
34
|
+
|
|
35
|
+
syncer = Syncify("my_syncer")
|
|
36
|
+
|
|
37
|
+
# Now you can use `syncer` to decorate your async functions or methods
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## How does it work?
|
|
42
|
+
The Syncify class wraps asynchronous functions, classmethods, instance methods, and static methods to
|
|
43
|
+
provide a synchronous interface. The wrapped methods are always executed in the context of a background loop,
|
|
44
|
+
whether they are called synchronously or asynchronously. This allows for seamless integration of async code, as
|
|
45
|
+
certain async libraries capture the event loop. An example is grpc.aio, which captures the event loop.
|
|
46
|
+
In such a case, the Syncify class ensures that the async function is executed in the context of the background loop.
|
|
47
|
+
|
|
48
|
+
To use it correctly with grpc.aio, you should wrap every grpc.aio channel creation, and client invocation
|
|
49
|
+
with the same `Syncify` instance. This ensures that the async code runs in the correct event loop context.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
from flyte.syncify._api import Syncify
|
|
53
|
+
|
|
54
|
+
syncify = Syncify()
|
|
55
|
+
|
|
56
|
+
__all__ = ["Syncify", "syncify"]
|