flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyte/__init__.py +83 -30
- flyte/_bin/connect.py +61 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +87 -19
- flyte/_bin/serve.py +351 -0
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +6 -5
- flyte/_cache/local_cache.py +216 -0
- flyte/_code_bundle/_ignore.py +31 -5
- flyte/_code_bundle/_packaging.py +42 -11
- flyte/_code_bundle/_utils.py +57 -34
- flyte/_code_bundle/bundle.py +130 -27
- flyte/_constants.py +1 -0
- flyte/_context.py +21 -5
- flyte/_custom_context.py +73 -0
- flyte/_debug/constants.py +37 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +315 -0
- flyte/_deploy.py +396 -75
- flyte/_deployer.py +109 -0
- flyte/_environment.py +94 -11
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_hash.py +1 -16
- flyte/_image.py +544 -231
- flyte/_initialize.py +456 -316
- flyte/_interface.py +40 -5
- flyte/_internal/controllers/__init__.py +22 -8
- flyte/_internal/controllers/_local_controller.py +159 -35
- flyte/_internal/controllers/_trace.py +18 -10
- flyte/_internal/controllers/remote/__init__.py +38 -9
- flyte/_internal/controllers/remote/_action.py +82 -12
- flyte/_internal/controllers/remote/_client.py +6 -2
- flyte/_internal/controllers/remote/_controller.py +290 -64
- flyte/_internal/controllers/remote/_core.py +155 -95
- flyte/_internal/controllers/remote/_informer.py +40 -20
- flyte/_internal/controllers/remote/_service_protocol.py +2 -2
- flyte/_internal/imagebuild/__init__.py +2 -10
- flyte/_internal/imagebuild/docker_builder.py +391 -84
- flyte/_internal/imagebuild/image_builder.py +111 -55
- flyte/_internal/imagebuild/remote_builder.py +409 -0
- flyte/_internal/imagebuild/utils.py +79 -0
- flyte/_internal/resolvers/_app_env_module.py +92 -0
- flyte/_internal/resolvers/_task_module.py +5 -38
- flyte/_internal/resolvers/app_env.py +26 -0
- flyte/_internal/resolvers/common.py +8 -1
- flyte/_internal/resolvers/default.py +2 -2
- flyte/_internal/runtime/convert.py +319 -36
- flyte/_internal/runtime/entrypoints.py +106 -18
- flyte/_internal/runtime/io.py +71 -23
- flyte/_internal/runtime/resources_serde.py +21 -7
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +196 -0
- flyte/_internal/runtime/task_serde.py +239 -66
- flyte/_internal/runtime/taskrunner.py +48 -8
- flyte/_internal/runtime/trigger_serde.py +162 -0
- flyte/_internal/runtime/types_serde.py +7 -16
- flyte/_keyring/file.py +115 -0
- flyte/_link.py +30 -0
- flyte/_logging.py +241 -42
- flyte/_map.py +312 -0
- flyte/_metrics.py +59 -0
- flyte/_module.py +74 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +296 -33
- flyte/_retry.py +1 -7
- flyte/_reusable_environment.py +72 -7
- flyte/_run.py +462 -132
- flyte/_secret.py +47 -11
- flyte/_serve.py +333 -0
- flyte/_task.py +245 -56
- flyte/_task_environment.py +219 -97
- flyte/_task_plugins.py +47 -0
- flyte/_tools.py +8 -8
- flyte/_trace.py +15 -24
- flyte/_trigger.py +1027 -0
- flyte/_utils/__init__.py +12 -1
- flyte/_utils/asyn.py +3 -1
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +5 -4
- flyte/_utils/description_parser.py +19 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/module_loader.py +123 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +8 -1
- flyte/_version.py +16 -3
- flyte/app/__init__.py +27 -0
- flyte/app/_app_environment.py +362 -0
- flyte/app/_connector_environment.py +40 -0
- flyte/app/_deploy.py +130 -0
- flyte/app/_parameter.py +343 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +383 -0
- flyte/app/_types.py +113 -0
- flyte/app/extras/__init__.py +9 -0
- flyte/app/extras/_auth_middleware.py +217 -0
- flyte/app/extras/_fastapi.py +93 -0
- flyte/app/extras/_model_loader/__init__.py +3 -0
- flyte/app/extras/_model_loader/config.py +7 -0
- flyte/app/extras/_model_loader/loader.py +288 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +493 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +401 -0
- flyte/cli/_gen.py +316 -0
- flyte/cli/_get.py +446 -0
- flyte/cli/_option.py +33 -0
- flyte/{_cli → cli}/_params.py +57 -17
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_prefetch.py +292 -0
- flyte/cli/_run.py +690 -0
- flyte/cli/_serve.py +338 -0
- flyte/cli/_update.py +86 -0
- flyte/cli/_user.py +20 -0
- flyte/cli/main.py +246 -0
- flyte/config/__init__.py +2 -167
- flyte/config/_config.py +215 -163
- flyte/config/_internal.py +10 -1
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +330 -0
- flyte/connectors/_server.py +194 -0
- flyte/connectors/utils.py +159 -0
- flyte/errors.py +134 -2
- flyte/extend.py +24 -0
- flyte/extras/_container.py +69 -56
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +279 -0
- flyte/io/__init__.py +8 -1
- flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
- flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
- flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
- flyte/io/_dir.py +575 -113
- flyte/io/_file.py +587 -141
- flyte/io/_hashing_io.py +342 -0
- flyte/io/extend.py +7 -0
- flyte/models.py +635 -0
- flyte/prefetch/__init__.py +22 -0
- flyte/prefetch/_hf_model.py +563 -0
- flyte/remote/__init__.py +14 -3
- flyte/remote/_action.py +879 -0
- flyte/remote/_app.py +346 -0
- flyte/remote/_auth_metadata.py +42 -0
- flyte/remote/_client/_protocols.py +62 -4
- flyte/remote/_client/auth/_auth_utils.py +19 -0
- flyte/remote/_client/auth/_authenticators/base.py +8 -2
- flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
- flyte/remote/_client/auth/_authenticators/factory.py +4 -0
- flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
- flyte/remote/_client/auth/_channel.py +47 -18
- flyte/remote/_client/auth/_client_config.py +5 -3
- flyte/remote/_client/auth/_keyring.py +15 -2
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +206 -18
- flyte/remote/_common.py +66 -0
- flyte/remote/_data.py +107 -22
- flyte/remote/_logs.py +116 -33
- flyte/remote/_project.py +21 -19
- flyte/remote/_run.py +164 -631
- flyte/remote/_secret.py +72 -29
- flyte/remote/_task.py +387 -46
- flyte/remote/_trigger.py +368 -0
- flyte/remote/_user.py +43 -0
- flyte/report/_report.py +10 -6
- flyte/storage/__init__.py +13 -1
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +289 -0
- flyte/storage/_storage.py +268 -59
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +414 -0
- flyte/types/__init__.py +39 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
- flyte/types/_string_literals.py +8 -9
- flyte/types/_type_engine.py +226 -126
- flyte/types/_utils.py +1 -1
- flyte-2.0.0b46.data/scripts/debug.py +38 -0
- flyte-2.0.0b46.data/scripts/runtime.py +194 -0
- flyte-2.0.0b46.dist-info/METADATA +352 -0
- flyte-2.0.0b46.dist-info/RECORD +221 -0
- flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
- flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
- flyte/_api_commons.py +0 -3
- flyte/_cli/_common.py +0 -299
- flyte/_cli/_create.py +0 -42
- flyte/_cli/_delete.py +0 -23
- flyte/_cli/_deploy.py +0 -140
- flyte/_cli/_get.py +0 -235
- flyte/_cli/_run.py +0 -174
- flyte/_cli/main.py +0 -98
- flyte/_datastructures.py +0 -342
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/_protos/common/authorization_pb2.py +0 -66
- flyte/_protos/common/authorization_pb2.pyi +0 -108
- flyte/_protos/common/authorization_pb2_grpc.py +0 -4
- flyte/_protos/common/identifier_pb2.py +0 -71
- flyte/_protos/common/identifier_pb2.pyi +0 -82
- flyte/_protos/common/identifier_pb2_grpc.py +0 -4
- flyte/_protos/common/identity_pb2.py +0 -48
- flyte/_protos/common/identity_pb2.pyi +0 -72
- flyte/_protos/common/identity_pb2_grpc.py +0 -4
- flyte/_protos/common/list_pb2.py +0 -36
- flyte/_protos/common/list_pb2.pyi +0 -69
- flyte/_protos/common/list_pb2_grpc.py +0 -4
- flyte/_protos/common/policy_pb2.py +0 -37
- flyte/_protos/common/policy_pb2.pyi +0 -27
- flyte/_protos/common/policy_pb2_grpc.py +0 -4
- flyte/_protos/common/role_pb2.py +0 -37
- flyte/_protos/common/role_pb2.pyi +0 -53
- flyte/_protos/common/role_pb2_grpc.py +0 -4
- flyte/_protos/common/runtime_version_pb2.py +0 -28
- flyte/_protos/common/runtime_version_pb2.pyi +0 -24
- flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
- flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
- flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/definition_pb2.py +0 -49
- flyte/_protos/secret/definition_pb2.pyi +0 -93
- flyte/_protos/secret/definition_pb2_grpc.py +0 -4
- flyte/_protos/secret/payload_pb2.py +0 -62
- flyte/_protos/secret/payload_pb2.pyi +0 -94
- flyte/_protos/secret/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/secret_pb2.py +0 -38
- flyte/_protos/secret/secret_pb2.pyi +0 -6
- flyte/_protos/secret/secret_pb2_grpc.py +0 -198
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
- flyte/_protos/validate/validate/validate_pb2.py +0 -76
- flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
- flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- flyte/_protos/workflow/queue_service_pb2.py +0 -106
- flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
- flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- flyte/_protos/workflow/run_definition_pb2.py +0 -128
- flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
- flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
- flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- flyte/_protos/workflow/run_service_pb2.py +0 -133
- flyte/_protos/workflow/run_service_pb2.pyi +0 -175
- flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
- flyte/_protos/workflow/state_service_pb2.py +0 -58
- flyte/_protos/workflow/state_service_pb2.pyi +0 -71
- flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
- flyte/_protos/workflow/task_definition_pb2.py +0 -72
- flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
- flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/task_service_pb2.py +0 -44
- flyte/_protos/workflow/task_service_pb2.pyi +0 -31
- flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/remote/_console.py +0 -18
- flyte-0.2.0b1.dist-info/METADATA +0 -179
- flyte-0.2.0b1.dist-info/RECORD +0 -204
- flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
- /flyte/{_cli → _debug}/__init__.py +0 -0
- /flyte/{_protos → _keyring}/__init__.py +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
- {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/storage/_storage.py
CHANGED
|
@@ -1,19 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
import pathlib
|
|
3
5
|
import random
|
|
4
6
|
import tempfile
|
|
5
7
|
import typing
|
|
6
|
-
from typing import
|
|
8
|
+
from typing import AsyncGenerator, Optional
|
|
7
9
|
from uuid import UUID
|
|
8
10
|
|
|
9
11
|
import fsspec
|
|
12
|
+
import obstore
|
|
10
13
|
from fsspec.asyn import AsyncFileSystem
|
|
11
14
|
from fsspec.utils import get_protocol
|
|
12
15
|
from obstore.exceptions import GenericError
|
|
13
16
|
from obstore.fsspec import register
|
|
17
|
+
from obstore.store import ObjectStore
|
|
14
18
|
|
|
15
19
|
from flyte._initialize import get_storage
|
|
16
20
|
from flyte._logging import logger
|
|
21
|
+
from flyte.errors import InitializationError, OnlyAsyncIOSupportedError
|
|
22
|
+
|
|
23
|
+
if typing.TYPE_CHECKING:
|
|
24
|
+
from obstore import AsyncReadableFile, AsyncWritableFile
|
|
25
|
+
|
|
26
|
+
_OBSTORE_SUPPORTED_PROTOCOLS = ["s3", "gs", "abfs", "abfss"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _is_obstore_supported_protocol(protocol: str) -> bool:
|
|
30
|
+
"""
|
|
31
|
+
Check if the given protocol is supported by obstore.
|
|
32
|
+
:param protocol: Protocol to check.
|
|
33
|
+
:return: True if the protocol is supported, False otherwise.
|
|
34
|
+
"""
|
|
35
|
+
return protocol in _OBSTORE_SUPPORTED_PROTOCOLS
|
|
17
36
|
|
|
18
37
|
|
|
19
38
|
def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
|
|
@@ -62,6 +81,52 @@ def get_random_local_directory() -> pathlib.Path:
|
|
|
62
81
|
return _dir
|
|
63
82
|
|
|
64
83
|
|
|
84
|
+
def get_configured_fsspec_kwargs(
|
|
85
|
+
protocol: typing.Optional[str] = None, anonymous: bool = False
|
|
86
|
+
) -> typing.Dict[str, typing.Any]:
|
|
87
|
+
if protocol:
|
|
88
|
+
# Try to get storage config safely - may not be initialized for local operations
|
|
89
|
+
try:
|
|
90
|
+
storage_config = get_storage()
|
|
91
|
+
except InitializationError:
|
|
92
|
+
storage_config = None
|
|
93
|
+
|
|
94
|
+
match protocol:
|
|
95
|
+
case "s3":
|
|
96
|
+
# If the protocol is s3, we can use the s3 filesystem
|
|
97
|
+
from flyte.storage import S3
|
|
98
|
+
|
|
99
|
+
if storage_config and isinstance(storage_config, S3):
|
|
100
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
101
|
+
|
|
102
|
+
return S3.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
103
|
+
case "gs":
|
|
104
|
+
# If the protocol is gs, we can use the gs filesystem
|
|
105
|
+
from flyte.storage import GCS
|
|
106
|
+
|
|
107
|
+
if storage_config and isinstance(storage_config, GCS):
|
|
108
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
109
|
+
|
|
110
|
+
return GCS.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
111
|
+
case "abfs" | "abfss":
|
|
112
|
+
# If the protocol is abfs or abfss, we can use the abfs filesystem
|
|
113
|
+
from flyte.storage import ABFS
|
|
114
|
+
|
|
115
|
+
if storage_config and isinstance(storage_config, ABFS):
|
|
116
|
+
return storage_config.get_fsspec_kwargs(anonymous=anonymous)
|
|
117
|
+
|
|
118
|
+
return ABFS.auto().get_fsspec_kwargs(anonymous=anonymous)
|
|
119
|
+
case _:
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
# If no protocol, return args from storage config if set
|
|
123
|
+
storage_config = get_storage()
|
|
124
|
+
if storage_config:
|
|
125
|
+
return storage_config.get_fsspec_kwargs(anonymous)
|
|
126
|
+
|
|
127
|
+
return {}
|
|
128
|
+
|
|
129
|
+
|
|
65
130
|
def get_underlying_filesystem(
|
|
66
131
|
protocol: typing.Optional[str] = None,
|
|
67
132
|
anonymous: bool = False,
|
|
@@ -72,10 +137,10 @@ def get_underlying_filesystem(
|
|
|
72
137
|
# If protocol is None, get it from the path
|
|
73
138
|
protocol = get_protocol(path)
|
|
74
139
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
return fsspec.filesystem(protocol, **
|
|
140
|
+
configured_kwargs = get_configured_fsspec_kwargs(protocol, anonymous=anonymous)
|
|
141
|
+
configured_kwargs.update(kwargs)
|
|
142
|
+
|
|
143
|
+
return fsspec.filesystem(protocol, **configured_kwargs)
|
|
79
144
|
|
|
80
145
|
|
|
81
146
|
def _get_anonymous_filesystem(from_path):
|
|
@@ -83,16 +148,86 @@ def _get_anonymous_filesystem(from_path):
|
|
|
83
148
|
return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
|
|
84
149
|
|
|
85
150
|
|
|
151
|
+
async def _get_obstore_bypass(
|
|
152
|
+
from_path: str,
|
|
153
|
+
to_path: str | pathlib.Path,
|
|
154
|
+
recursive: bool = False,
|
|
155
|
+
exclude: list[str] | None = None,
|
|
156
|
+
**kwargs,
|
|
157
|
+
) -> str:
|
|
158
|
+
from flyte.storage._parallel_reader import ObstoreParallelReader
|
|
159
|
+
|
|
160
|
+
fs = get_underlying_filesystem(path=from_path)
|
|
161
|
+
bucket, prefix = fs._split_path(from_path) # pylint: disable=W0212
|
|
162
|
+
store: ObjectStore = fs._construct_store(bucket)
|
|
163
|
+
|
|
164
|
+
download_kwargs = {}
|
|
165
|
+
if "chunk_size" in kwargs:
|
|
166
|
+
download_kwargs["chunk_size"] = kwargs["chunk_size"]
|
|
167
|
+
if "max_concurrency" in kwargs:
|
|
168
|
+
download_kwargs["max_concurrency"] = kwargs["max_concurrency"]
|
|
169
|
+
|
|
170
|
+
reader = ObstoreParallelReader(store, **download_kwargs)
|
|
171
|
+
target_path = pathlib.Path(to_path) if isinstance(to_path, str) else to_path
|
|
172
|
+
|
|
173
|
+
# if recursive, just download the prefix to the target path
|
|
174
|
+
if recursive:
|
|
175
|
+
logger.debug(f"Downloading recursively {prefix=} to {target_path=}")
|
|
176
|
+
await reader.download_files(
|
|
177
|
+
prefix,
|
|
178
|
+
target_path,
|
|
179
|
+
exclude=exclude,
|
|
180
|
+
)
|
|
181
|
+
return str(to_path)
|
|
182
|
+
|
|
183
|
+
# if not recursive, we need to split out the file name from the prefix
|
|
184
|
+
else:
|
|
185
|
+
path_for_reader = pathlib.Path(prefix).name
|
|
186
|
+
final_prefix = pathlib.Path(prefix).parent
|
|
187
|
+
logger.debug(f"Downloading single file {final_prefix=}, {path_for_reader=} to {target_path=}")
|
|
188
|
+
await reader.download_files(
|
|
189
|
+
final_prefix,
|
|
190
|
+
target_path.parent,
|
|
191
|
+
path_for_reader,
|
|
192
|
+
destination_file_name=target_path.name,
|
|
193
|
+
)
|
|
194
|
+
return str(target_path)
|
|
195
|
+
|
|
196
|
+
|
|
86
197
|
async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
|
|
87
198
|
if not to_path:
|
|
88
|
-
name = pathlib.Path(from_path).name
|
|
199
|
+
name = pathlib.Path(from_path).name # may need to be adjusted for windows
|
|
89
200
|
to_path = get_random_local_path(file_path_or_file_name=name)
|
|
90
201
|
logger.debug(f"Storing file from {from_path} to {to_path}")
|
|
202
|
+
else:
|
|
203
|
+
# Only apply directory logic for single files (not recursive)
|
|
204
|
+
if not recursive:
|
|
205
|
+
to_path_str = str(to_path)
|
|
206
|
+
# Check for trailing separator BEFORE converting to Path (which normalizes and removes it)
|
|
207
|
+
ends_with_sep = to_path_str.endswith(os.sep)
|
|
208
|
+
to_path_obj = pathlib.Path(to_path)
|
|
209
|
+
|
|
210
|
+
# If path ends with os.sep or is an existing directory, append source filename
|
|
211
|
+
if ends_with_sep or (to_path_obj.exists() and to_path_obj.is_dir()):
|
|
212
|
+
source_filename = pathlib.Path(from_path).name # may need to be adjusted for windows
|
|
213
|
+
to_path = to_path_obj / source_filename
|
|
214
|
+
# For recursive=True, keep to_path as-is (it's the destination directory for contents)
|
|
215
|
+
|
|
91
216
|
file_system = get_underlying_filesystem(path=from_path)
|
|
217
|
+
|
|
218
|
+
# Check if we should use obstore bypass
|
|
219
|
+
if (
|
|
220
|
+
_is_obstore_supported_protocol(file_system.protocol)
|
|
221
|
+
and hasattr(file_system, "_split_path")
|
|
222
|
+
and hasattr(file_system, "_construct_store")
|
|
223
|
+
and recursive
|
|
224
|
+
):
|
|
225
|
+
return await _get_obstore_bypass(from_path, to_path, recursive, **kwargs)
|
|
226
|
+
|
|
92
227
|
try:
|
|
93
228
|
return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
|
|
94
229
|
except (OSError, GenericError) as oe:
|
|
95
|
-
logger.debug(f"Error in getting {from_path} to {to_path}
|
|
230
|
+
logger.debug(f"Error in getting {from_path} to {to_path}, recursive: {recursive}, error: {oe}")
|
|
96
231
|
if isinstance(file_system, AsyncFileSystem):
|
|
97
232
|
try:
|
|
98
233
|
exists = await file_system._exists(from_path) # pylint: disable=W0212
|
|
@@ -103,7 +238,6 @@ async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recu
|
|
|
103
238
|
else:
|
|
104
239
|
exists = file_system.exists(from_path)
|
|
105
240
|
if not exists:
|
|
106
|
-
# TODO: update exception to be more specific
|
|
107
241
|
raise AssertionError(f"Unable to load data from {from_path}")
|
|
108
242
|
file_system = _get_anonymous_filesystem(from_path)
|
|
109
243
|
logger.debug(f"Attempting anonymous get with {file_system}")
|
|
@@ -118,21 +252,21 @@ async def _get_from_filesystem(
|
|
|
118
252
|
**kwargs,
|
|
119
253
|
):
|
|
120
254
|
if isinstance(file_system, AsyncFileSystem):
|
|
121
|
-
dst = await file_system._get(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
255
|
+
dst = await file_system._get(str(from_path), str(to_path), recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
122
256
|
else:
|
|
123
|
-
dst = file_system.get(from_path, to_path, recursive=recursive, **kwargs)
|
|
257
|
+
dst = file_system.get(str(from_path), str(to_path), recursive=recursive, **kwargs)
|
|
124
258
|
|
|
125
259
|
if isinstance(dst, (str, pathlib.Path)):
|
|
126
260
|
return dst
|
|
127
|
-
return to_path
|
|
261
|
+
return str(to_path)
|
|
128
262
|
|
|
129
263
|
|
|
130
|
-
async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs):
|
|
264
|
+
async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
|
|
131
265
|
if not to_path:
|
|
132
266
|
from flyte._context import internal_ctx
|
|
133
267
|
|
|
134
268
|
ctx = internal_ctx()
|
|
135
|
-
name = pathlib.Path(from_path).name
|
|
269
|
+
name = pathlib.Path(from_path).name
|
|
136
270
|
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
137
271
|
|
|
138
272
|
file_system = get_underlying_filesystem(path=to_path)
|
|
@@ -142,11 +276,53 @@ async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = F
|
|
|
142
276
|
else:
|
|
143
277
|
dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
|
|
144
278
|
if isinstance(dst, (str, pathlib.Path)):
|
|
145
|
-
return dst
|
|
279
|
+
return str(dst)
|
|
146
280
|
else:
|
|
147
281
|
return to_path
|
|
148
282
|
|
|
149
283
|
|
|
284
|
+
async def _open_obstore_bypass(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
|
|
285
|
+
"""
|
|
286
|
+
Simple obstore bypass for opening files. No fallbacks, obstore only.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
fs = get_underlying_filesystem(path=path)
|
|
290
|
+
bucket, file_path = fs._split_path(path) # pylint: disable=W0212
|
|
291
|
+
store: ObjectStore = fs._construct_store(bucket)
|
|
292
|
+
|
|
293
|
+
file_handle: AsyncReadableFile | AsyncWritableFile
|
|
294
|
+
|
|
295
|
+
if "w" in mode:
|
|
296
|
+
attributes = kwargs.pop("attributes", {})
|
|
297
|
+
file_handle = obstore.open_writer_async(store, file_path, attributes=attributes)
|
|
298
|
+
else: # read mode
|
|
299
|
+
buffer_size = kwargs.pop("buffer_size", 10 * 2**20)
|
|
300
|
+
file_handle = await obstore.open_reader_async(store, file_path, buffer_size=buffer_size)
|
|
301
|
+
return file_handle
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
async def open(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
|
|
305
|
+
"""
|
|
306
|
+
Asynchronously open a file and return an async context manager.
|
|
307
|
+
This function checks if the underlying filesystem supports obstore bypass.
|
|
308
|
+
If it does, it uses obstore to open the file. Otherwise, it falls back to
|
|
309
|
+
the standard _open function which uses AsyncFileSystem.
|
|
310
|
+
|
|
311
|
+
It will raise NotImplementedError if neither obstore nor AsyncFileSystem is supported.
|
|
312
|
+
"""
|
|
313
|
+
fs = get_underlying_filesystem(path=path)
|
|
314
|
+
|
|
315
|
+
# Check if we should use obstore bypass
|
|
316
|
+
if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
|
|
317
|
+
return await _open_obstore_bypass(path, mode, **kwargs)
|
|
318
|
+
|
|
319
|
+
# Fallback to normal open
|
|
320
|
+
if isinstance(fs, AsyncFileSystem):
|
|
321
|
+
return await fs.open_async(path, mode, **kwargs)
|
|
322
|
+
|
|
323
|
+
raise OnlyAsyncIOSupportedError(f"Filesystem {fs} does not support async operations")
|
|
324
|
+
|
|
325
|
+
|
|
150
326
|
async def put_stream(
|
|
151
327
|
data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
|
|
152
328
|
) -> str:
|
|
@@ -172,70 +348,75 @@ async def put_stream(
|
|
|
172
348
|
|
|
173
349
|
ctx = internal_ctx()
|
|
174
350
|
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
351
|
+
|
|
352
|
+
# Check if we should use obstore bypass
|
|
175
353
|
fs = get_underlying_filesystem(path=to_path)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
try:
|
|
179
|
-
file_handle = await fs.open_async(to_path, "wb", **kwargs)
|
|
180
|
-
if isinstance(data_iterable, bytes):
|
|
181
|
-
await file_handle.write(data_iterable)
|
|
182
|
-
else:
|
|
183
|
-
async for data in data_iterable:
|
|
184
|
-
await file_handle.write(data)
|
|
185
|
-
return str(to_path)
|
|
186
|
-
except NotImplementedError:
|
|
187
|
-
logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
|
|
188
|
-
finally:
|
|
189
|
-
if file_handle is not None:
|
|
190
|
-
await file_handle.close()
|
|
191
|
-
|
|
192
|
-
with fs.open(to_path, "wb", **kwargs) as f:
|
|
354
|
+
try:
|
|
355
|
+
file_handle = typing.cast("AsyncWritableFile", await open(to_path, "wb", **kwargs))
|
|
193
356
|
if isinstance(data_iterable, bytes):
|
|
194
|
-
|
|
357
|
+
await file_handle.write(data_iterable)
|
|
195
358
|
else:
|
|
196
|
-
# If data_iterable is async iterable, iterate over it and write each chunk to the file
|
|
197
359
|
async for data in data_iterable:
|
|
198
|
-
|
|
360
|
+
await file_handle.write(data)
|
|
361
|
+
await file_handle.close()
|
|
362
|
+
return str(to_path)
|
|
363
|
+
except OnlyAsyncIOSupportedError:
|
|
364
|
+
pass
|
|
365
|
+
|
|
366
|
+
# Fallback to normal open
|
|
367
|
+
file_handle_io: typing.IO = fs.open(to_path, mode="wb", **kwargs)
|
|
368
|
+
if isinstance(data_iterable, bytes):
|
|
369
|
+
file_handle_io.write(data_iterable)
|
|
370
|
+
else:
|
|
371
|
+
async for data in data_iterable:
|
|
372
|
+
file_handle_io.write(data)
|
|
373
|
+
file_handle_io.close()
|
|
374
|
+
|
|
199
375
|
return str(to_path)
|
|
200
376
|
|
|
201
377
|
|
|
202
|
-
async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) ->
|
|
378
|
+
async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncGenerator[bytes, None]:
|
|
203
379
|
"""
|
|
204
380
|
Get a stream of data from a remote location.
|
|
205
381
|
This is useful for downloading streaming data from a remote location.
|
|
206
382
|
Example usage:
|
|
207
383
|
```python
|
|
208
384
|
import flyte.storage as storage
|
|
209
|
-
|
|
385
|
+
async for chunk in storage.get_stream(path="s3://my_bucket/my_file.txt"):
|
|
386
|
+
process(chunk)
|
|
210
387
|
```
|
|
211
388
|
|
|
212
389
|
:param path: Path to the remote location where the data will be downloaded.
|
|
213
390
|
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
214
391
|
:param chunk_size: Size of each chunk to be read from the file.
|
|
215
|
-
:return: An async iterator that yields chunks of
|
|
392
|
+
:return: An async iterator that yields chunks of bytes.
|
|
216
393
|
"""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
with fs.open(path, "rb") as file_handle:
|
|
236
|
-
while chunk := file_handle.read(min(chunk_size, file_size - total_read)):
|
|
237
|
-
total_read += len(chunk)
|
|
394
|
+
# Check if we should use obstore bypass
|
|
395
|
+
fs = get_underlying_filesystem(path=path)
|
|
396
|
+
if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
|
|
397
|
+
# Set buffer_size for obstore if chunk_size is provided
|
|
398
|
+
if "buffer_size" not in kwargs:
|
|
399
|
+
kwargs["buffer_size"] = chunk_size
|
|
400
|
+
file_handle = typing.cast("AsyncReadableFile", await _open_obstore_bypass(path, "rb", **kwargs))
|
|
401
|
+
while chunk := await file_handle.read():
|
|
402
|
+
yield bytes(chunk)
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
# Fallback to normal open
|
|
406
|
+
if "block_size" not in kwargs:
|
|
407
|
+
kwargs["block_size"] = chunk_size
|
|
408
|
+
|
|
409
|
+
if isinstance(fs, AsyncFileSystem):
|
|
410
|
+
file_handle = await fs.open_async(path, "rb", **kwargs)
|
|
411
|
+
while chunk := await file_handle.read():
|
|
238
412
|
yield chunk
|
|
413
|
+
await file_handle.close()
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
file_handle = fs.open(path, "rb", **kwargs)
|
|
417
|
+
while chunk := file_handle.read():
|
|
418
|
+
yield chunk
|
|
419
|
+
file_handle.close()
|
|
239
420
|
|
|
240
421
|
|
|
241
422
|
def join(*paths: str) -> str:
|
|
@@ -248,4 +429,32 @@ def join(*paths: str) -> str:
|
|
|
248
429
|
return str(os.path.join(*paths))
|
|
249
430
|
|
|
250
431
|
|
|
251
|
-
|
|
432
|
+
async def exists(path: str, **kwargs) -> bool:
|
|
433
|
+
"""
|
|
434
|
+
Check if a path exists.
|
|
435
|
+
|
|
436
|
+
:param path: Path to be checked.
|
|
437
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
438
|
+
:return: True if the path exists, False otherwise.
|
|
439
|
+
"""
|
|
440
|
+
try:
|
|
441
|
+
fs = get_underlying_filesystem(path=path, **kwargs)
|
|
442
|
+
if isinstance(fs, AsyncFileSystem):
|
|
443
|
+
_ = await fs._info(path)
|
|
444
|
+
return True
|
|
445
|
+
_ = fs.info(path)
|
|
446
|
+
return True
|
|
447
|
+
except FileNotFoundError:
|
|
448
|
+
return False
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def exists_sync(path: str, **kwargs) -> bool:
|
|
452
|
+
try:
|
|
453
|
+
fs = get_underlying_filesystem(path=path, **kwargs)
|
|
454
|
+
_ = fs.info(path)
|
|
455
|
+
return True
|
|
456
|
+
except FileNotFoundError:
|
|
457
|
+
return False
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
register(_OBSTORE_SUPPORTED_PROTOCOLS, asynchronous=True)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
# Syncify Module
|
|
3
|
+
This module provides the `syncify` decorator and the `Syncify` class.
|
|
4
|
+
The decorator can be used to convert asynchronous functions or methods into synchronous ones.
|
|
5
|
+
This is useful for integrating async code into synchronous contexts.
|
|
6
|
+
|
|
7
|
+
Every asynchronous function or method wrapped with `syncify` can be called synchronously using the
|
|
8
|
+
parenthesis `()` operator, or asynchronously using the `.aio()` method.
|
|
9
|
+
|
|
10
|
+
Example::
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
from flyte.syncify import syncify
|
|
14
|
+
|
|
15
|
+
@syncify
|
|
16
|
+
async def async_function(x: str) -> str:
|
|
17
|
+
return f"Hello, Async World {x}!"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# now you can call it synchronously
|
|
21
|
+
result = async_function("Async World") # Note: no .aio() needed for sync calls
|
|
22
|
+
print(result)
|
|
23
|
+
# Output: Hello, Async World Async World!
|
|
24
|
+
|
|
25
|
+
# or call it asynchronously
|
|
26
|
+
async def main():
|
|
27
|
+
result = await async_function.aio("World") # Note the use of .aio() for async calls
|
|
28
|
+
print(result)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Creating a Syncify Instance
|
|
32
|
+
```python
|
|
33
|
+
from flyte.syncify. import Syncify
|
|
34
|
+
|
|
35
|
+
syncer = Syncify("my_syncer")
|
|
36
|
+
|
|
37
|
+
# Now you can use `syncer` to decorate your async functions or methods
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## How does it work?
|
|
42
|
+
The Syncify class wraps asynchronous functions, classmethods, instance methods, and static methods to
|
|
43
|
+
provide a synchronous interface. The wrapped methods are always executed in the context of a background loop,
|
|
44
|
+
whether they are called synchronously or asynchronously. This allows for seamless integration of async code, as
|
|
45
|
+
certain async libraries capture the event loop. An example is grpc.aio, which captures the event loop.
|
|
46
|
+
In such a case, the Syncify class ensures that the async function is executed in the context of the background loop.
|
|
47
|
+
|
|
48
|
+
To use it correctly with grpc.aio, you should wrap every grpc.aio channel creation, and client invocation
|
|
49
|
+
with the same `Syncify` instance. This ensures that the async code runs in the correct event loop context.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
from flyte.syncify._api import Syncify
|
|
53
|
+
|
|
54
|
+
syncify = Syncify()
|
|
55
|
+
|
|
56
|
+
__all__ = ["Syncify", "syncify"]
|