flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +78 -2
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +152 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +145 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +323 -0
- flyte/_code_bundle/bundle.py +209 -0
- flyte/_context.py +152 -0
- flyte/_deploy.py +243 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +84 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +762 -0
- flyte/_initialize.py +492 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +128 -0
- flyte/_internal/controllers/_local_controller.py +193 -0
- flyte/_internal/controllers/_trace.py +41 -0
- flyte/_internal/controllers/remote/__init__.py +60 -0
- flyte/_internal/controllers/remote/_action.py +146 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +494 -0
- flyte/_internal/controllers/remote/_core.py +410 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +427 -0
- flyte/_internal/imagebuild/image_builder.py +246 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +342 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +330 -0
- flyte/_internal/runtime/taskrunner.py +191 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +135 -0
- flyte/_map.py +215 -0
- flyte/_pod.py +19 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +71 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/common_pb2.py +27 -0
- flyte/_protos/workflow/common_pb2.pyi +14 -0
- flyte/_protos/workflow/common_pb2_grpc.py +4 -0
- flyte/_protos/workflow/environment_pb2.py +29 -0
- flyte/_protos/workflow/environment_pb2.pyi +12 -0
- flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +105 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +129 -0
- flyte/_protos/workflow/run_service_pb2.pyi +171 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +66 -0
- flyte/_protos/workflow/state_service_pb2.pyi +75 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +79 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +60 -0
- flyte/_protos/workflow/task_service_pb2.pyi +59 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +482 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +449 -0
- flyte/_task_environment.py +183 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +120 -0
- flyte/_utils/__init__.py +26 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +23 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/cli/__init__.py +3 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_common.py +337 -0
- flyte/cli/_create.py +145 -0
- flyte/cli/_delete.py +23 -0
- flyte/cli/_deploy.py +152 -0
- flyte/cli/_gen.py +163 -0
- flyte/cli/_get.py +310 -0
- flyte/cli/_params.py +538 -0
- flyte/cli/_run.py +231 -0
- flyte/cli/main.py +166 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +216 -0
- flyte/config/_internal.py +64 -0
- flyte/config/_reader.py +207 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +172 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +263 -0
- flyte/io/__init__.py +27 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +467 -0
- flyte/io/_structured_dataset/__init__.py +129 -0
- flyte/io/_structured_dataset/basic_dfs.py +219 -0
- flyte/io/_structured_dataset/structured_dataset.py +1061 -0
- flyte/models.py +391 -0
- flyte/remote/__init__.py +26 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +133 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +215 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +159 -0
- flyte/remote/_logs.py +176 -0
- flyte/remote/_project.py +85 -0
- flyte/remote/_run.py +970 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +391 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +29 -0
- flyte/storage/_config.py +233 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +271 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +371 -0
- flyte/types/__init__.py +36 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +118 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2287 -0
- flyte/types/_utils.py +80 -0
- flyte-0.2.0a0.dist-info/METADATA +249 -0
- flyte-0.2.0a0.dist-info/RECORD +218 -0
- {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
- flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
- flyte-0.2.0a0.dist-info/top_level.txt +1 -0
- flyte-0.1.0.dist-info/METADATA +0 -6
- flyte-0.1.0.dist-info/RECORD +0 -5
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8">
|
|
5
|
+
<title>User Content</title>
|
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
7
|
+
<link href="https://fonts.googleapis.com/css?family=Lato:300,400,700%7COpen+Sans:400,700" rel="stylesheet">
|
|
8
|
+
<style>
|
|
9
|
+
ol, ul {
|
|
10
|
+
list-style: none;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
table {
|
|
14
|
+
border-collapse: collapse;
|
|
15
|
+
border-spacing: 0;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
#flyte-frame-nav {
|
|
19
|
+
display: flex;
|
|
20
|
+
width: 100%;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
#flyte-frame-tabs {
|
|
24
|
+
display: flex;
|
|
25
|
+
width: 100%;
|
|
26
|
+
justify-content: center;
|
|
27
|
+
margin-block: 0;
|
|
28
|
+
padding-inline-start: 0;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
#flyte-frame-tabs li {
|
|
32
|
+
cursor: pointer;
|
|
33
|
+
padding: 8px;
|
|
34
|
+
margin: 0;
|
|
35
|
+
margin-right: 12px;
|
|
36
|
+
font-size: 14px;
|
|
37
|
+
line-height: 20px;
|
|
38
|
+
font-weight: 700;
|
|
39
|
+
font-style: normal;
|
|
40
|
+
font-family: Open Sans, helvetica, arial, sans-serif;
|
|
41
|
+
color: #666666;
|
|
42
|
+
width: 126px;
|
|
43
|
+
text-align: center;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
#flyte-frame-tabs li:last-child {
|
|
47
|
+
margin-right: 0;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
#flyte-frame-tabs li.active {
|
|
51
|
+
border-bottom: 4px solid rgb(163, 26, 255);
|
|
52
|
+
color: #333333;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
#flyte-frame-container {
|
|
56
|
+
width: auto;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
#flyte-frame-container > div {
|
|
60
|
+
display: None;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
#flyte-frame-container > div.active {
|
|
64
|
+
display: block;
|
|
65
|
+
padding: 2rem 2rem;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
</style>
|
|
69
|
+
|
|
70
|
+
</head>
|
|
71
|
+
<body>
|
|
72
|
+
<nav id="flyte-frame-nav">
|
|
73
|
+
<ul id="flyte-frame-tabs">
|
|
74
|
+
$NAV_HTML
|
|
75
|
+
</ul>
|
|
76
|
+
</nav>
|
|
77
|
+
<div id="flyte-frame-container">
|
|
78
|
+
$BODY_HTML
|
|
79
|
+
</div>
|
|
80
|
+
</body>
|
|
81
|
+
<script>
|
|
82
|
+
const setTabs = index => {
|
|
83
|
+
const container = document.getElementById('flyte-frame-tabs')
|
|
84
|
+
for (let i = 0; i < container.children.length; i++) {
|
|
85
|
+
const tabIndex = container.children[i].getAttribute('link_index')
|
|
86
|
+
if (tabIndex === index) {
|
|
87
|
+
container.children[i].classList.add('active')
|
|
88
|
+
} else {
|
|
89
|
+
container.children[i].className = ''
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
const setContent = index => {
|
|
94
|
+
const container = document.getElementById('flyte-frame-container')
|
|
95
|
+
for (let i = 0; i < container.children.length; i++) {
|
|
96
|
+
const tabIndex = container.children[i].getAttribute('link_index')
|
|
97
|
+
if (tabIndex === index) {
|
|
98
|
+
container.children[i].classList.add('active')
|
|
99
|
+
} else {
|
|
100
|
+
container.children[i].className = ''
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const setLinkIndex = index => {
|
|
105
|
+
setTabs(index)
|
|
106
|
+
setContent(index)
|
|
107
|
+
}
|
|
108
|
+
const handleLinkClick = e => {
|
|
109
|
+
const linkIndex = e.getAttribute('link_index');
|
|
110
|
+
setLinkIndex(linkIndex)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const tabs = document.getElementById('flyte-frame-tabs');
|
|
114
|
+
const containers = document.getElementById('flyte-frame-container');
|
|
115
|
+
for(var i = 0; i < tabs.children.length; i++) {
|
|
116
|
+
if (i === 0) {
|
|
117
|
+
tabs.children[i].classList.add('active')
|
|
118
|
+
containers.children[i].classList.add('active')
|
|
119
|
+
}
|
|
120
|
+
tabs.children[i].setAttribute("link_index", i+1)
|
|
121
|
+
containers.children[i].setAttribute("link_index", i+1)
|
|
122
|
+
}
|
|
123
|
+
</script>
|
|
124
|
+
</html>
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
__all__ = [
|
|
2
|
+
"ABFS",
|
|
3
|
+
"GCS",
|
|
4
|
+
"S3",
|
|
5
|
+
"Storage",
|
|
6
|
+
"get",
|
|
7
|
+
"get_random_local_directory",
|
|
8
|
+
"get_random_local_path",
|
|
9
|
+
"get_stream",
|
|
10
|
+
"get_underlying_filesystem",
|
|
11
|
+
"is_remote",
|
|
12
|
+
"join",
|
|
13
|
+
"put",
|
|
14
|
+
"put_stream",
|
|
15
|
+
"put_stream",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
from ._config import ABFS, GCS, S3, Storage
|
|
19
|
+
from ._storage import (
|
|
20
|
+
get,
|
|
21
|
+
get_random_local_directory,
|
|
22
|
+
get_random_local_path,
|
|
23
|
+
get_stream,
|
|
24
|
+
get_underlying_filesystem,
|
|
25
|
+
is_remote,
|
|
26
|
+
join,
|
|
27
|
+
put,
|
|
28
|
+
put_stream,
|
|
29
|
+
)
|
flyte/storage/_config.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import os
|
|
5
|
+
import typing
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import ClassVar
|
|
8
|
+
|
|
9
|
+
from flyte.config import set_if_exists
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
13
|
+
class Storage(object):
|
|
14
|
+
"""
|
|
15
|
+
Data storage configuration that applies across any provider.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
retries: int = 3
|
|
19
|
+
backoff: datetime.timedelta = datetime.timedelta(seconds=5)
|
|
20
|
+
enable_debug: bool = False
|
|
21
|
+
attach_execution_metadata: bool = True
|
|
22
|
+
|
|
23
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
|
|
24
|
+
"enable_debug": "UNION_STORAGE_DEBUG",
|
|
25
|
+
"retries": "UNION_STORAGE_RETRIES",
|
|
26
|
+
"backoff": "UNION_STORAGE_BACKOFF_SECONDS",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
30
|
+
"""
|
|
31
|
+
Returns the configuration as kwargs for constructing an fsspec filesystem.
|
|
32
|
+
"""
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def _auto_as_kwargs(cls) -> typing.Dict[str, typing.Any]:
|
|
37
|
+
retries = os.getenv(cls._KEY_ENV_VAR_MAPPING["retries"])
|
|
38
|
+
backoff = os.getenv(cls._KEY_ENV_VAR_MAPPING["backoff"])
|
|
39
|
+
enable_debug = os.getenv(cls._KEY_ENV_VAR_MAPPING["enable_debug"])
|
|
40
|
+
|
|
41
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
42
|
+
kwargs = set_if_exists(kwargs, "enable_debug", enable_debug)
|
|
43
|
+
kwargs = set_if_exists(kwargs, "retries", retries)
|
|
44
|
+
kwargs = set_if_exists(kwargs, "backoff", backoff)
|
|
45
|
+
return kwargs
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def auto(cls) -> Storage:
|
|
49
|
+
"""
|
|
50
|
+
Construct the config object automatically from environment variables.
|
|
51
|
+
"""
|
|
52
|
+
return cls(**cls._auto_as_kwargs())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
56
|
+
class S3(Storage):
|
|
57
|
+
"""
|
|
58
|
+
S3 specific configuration
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
endpoint: typing.Optional[str] = None
|
|
62
|
+
access_key_id: typing.Optional[str] = None
|
|
63
|
+
secret_access_key: typing.Optional[str] = None
|
|
64
|
+
|
|
65
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
|
|
66
|
+
"endpoint": "FLYTE_AWS_ENDPOINT",
|
|
67
|
+
"access_key_id": "FLYTE_AWS_ACCESS_KEY_ID",
|
|
68
|
+
"secret_access_key": "FLYTE_AWS_SECRET_ACCESS_KEY",
|
|
69
|
+
} | Storage._KEY_ENV_VAR_MAPPING
|
|
70
|
+
|
|
71
|
+
# Refer to https://github.com/developmentseed/obstore/blob/33654fc37f19a657689eb93327b621e9f9e01494/obstore/python/obstore/store/_aws.pyi#L11
|
|
72
|
+
# for key and secret
|
|
73
|
+
_CONFIG_KEY_FSSPEC_S3_KEY_ID: ClassVar = "access_key_id"
|
|
74
|
+
_CONFIG_KEY_FSSPEC_S3_SECRET: ClassVar = "secret_access_key"
|
|
75
|
+
_CONFIG_KEY_ENDPOINT: ClassVar = "endpoint_url"
|
|
76
|
+
_KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def auto(cls) -> S3:
|
|
80
|
+
"""
|
|
81
|
+
:return: Config
|
|
82
|
+
"""
|
|
83
|
+
endpoint = os.getenv(cls._KEY_ENV_VAR_MAPPING["endpoint"], None)
|
|
84
|
+
access_key_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["access_key_id"], None)
|
|
85
|
+
secret_access_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["secret_access_key"], None)
|
|
86
|
+
|
|
87
|
+
kwargs = super()._auto_as_kwargs()
|
|
88
|
+
kwargs = set_if_exists(kwargs, "endpoint", endpoint)
|
|
89
|
+
kwargs = set_if_exists(kwargs, "access_key_id", access_key_id)
|
|
90
|
+
kwargs = set_if_exists(kwargs, "secret_access_key", secret_access_key)
|
|
91
|
+
|
|
92
|
+
return S3(**kwargs)
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def for_sandbox(cls) -> S3:
|
|
96
|
+
"""
|
|
97
|
+
:return:
|
|
98
|
+
"""
|
|
99
|
+
kwargs = super()._auto_as_kwargs()
|
|
100
|
+
final_kwargs = kwargs | {
|
|
101
|
+
"endpoint": "http://localhost:4566",
|
|
102
|
+
"access_key_id": "minio",
|
|
103
|
+
"secret_access_key": "miniostorage",
|
|
104
|
+
}
|
|
105
|
+
return S3(**final_kwargs)
|
|
106
|
+
|
|
107
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
108
|
+
# Construct the config object
|
|
109
|
+
kwargs.pop("anonymous", None) # Remove anonymous if it exists, as we handle it separately
|
|
110
|
+
config: typing.Dict[str, typing.Any] = {}
|
|
111
|
+
if self._CONFIG_KEY_FSSPEC_S3_KEY_ID in kwargs or self.access_key_id:
|
|
112
|
+
config[self._CONFIG_KEY_FSSPEC_S3_KEY_ID] = kwargs.pop(
|
|
113
|
+
self._CONFIG_KEY_FSSPEC_S3_KEY_ID, self.access_key_id
|
|
114
|
+
)
|
|
115
|
+
if self._CONFIG_KEY_FSSPEC_S3_SECRET in kwargs or self.secret_access_key:
|
|
116
|
+
config[self._CONFIG_KEY_FSSPEC_S3_SECRET] = kwargs.pop(
|
|
117
|
+
self._CONFIG_KEY_FSSPEC_S3_SECRET, self.secret_access_key
|
|
118
|
+
)
|
|
119
|
+
if self._CONFIG_KEY_ENDPOINT in kwargs or self.endpoint:
|
|
120
|
+
config["endpoint_url"] = kwargs.pop(self._CONFIG_KEY_ENDPOINT, self.endpoint)
|
|
121
|
+
|
|
122
|
+
retries = kwargs.pop("retries", self.retries)
|
|
123
|
+
backoff = kwargs.pop("backoff", self.backoff)
|
|
124
|
+
|
|
125
|
+
if anonymous:
|
|
126
|
+
config[self._KEY_SKIP_SIGNATURE] = True
|
|
127
|
+
|
|
128
|
+
retry_config = {
|
|
129
|
+
"max_retries": retries,
|
|
130
|
+
"backoff": {
|
|
131
|
+
"base": 2,
|
|
132
|
+
"init_backoff": backoff,
|
|
133
|
+
"max_backoff": datetime.timedelta(seconds=16),
|
|
134
|
+
},
|
|
135
|
+
"retry_timeout": datetime.timedelta(minutes=3),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
client_options = {"timeout": "99999s", "allow_http": True}
|
|
139
|
+
|
|
140
|
+
if config:
|
|
141
|
+
kwargs["config"] = config
|
|
142
|
+
kwargs["client_options"] = client_options or None
|
|
143
|
+
kwargs["retry_config"] = retry_config or None
|
|
144
|
+
|
|
145
|
+
return kwargs
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
149
|
+
class GCS(Storage):
|
|
150
|
+
"""
|
|
151
|
+
Any GCS specific configuration.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
gsutil_parallelism: bool = False
|
|
155
|
+
|
|
156
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
|
|
157
|
+
"gsutil_parallelism": "GCP_GSUTIL_PARALLELISM",
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def auto(cls) -> GCS:
|
|
162
|
+
gsutil_parallelism = os.getenv(cls._KEY_ENV_VAR_MAPPING["gsutil_parallelism"], None)
|
|
163
|
+
|
|
164
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
165
|
+
kwargs = set_if_exists(kwargs, "gsutil_parallelism", gsutil_parallelism)
|
|
166
|
+
return GCS(**kwargs)
|
|
167
|
+
|
|
168
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
169
|
+
kwargs.pop("anonymous", None)
|
|
170
|
+
return kwargs
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
174
|
+
class ABFS(Storage):
|
|
175
|
+
"""
|
|
176
|
+
Any Azure Blob Storage specific configuration.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
account_name: typing.Optional[str] = None
|
|
180
|
+
account_key: typing.Optional[str] = None
|
|
181
|
+
tenant_id: typing.Optional[str] = None
|
|
182
|
+
client_id: typing.Optional[str] = None
|
|
183
|
+
client_secret: typing.Optional[str] = None
|
|
184
|
+
|
|
185
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
|
|
186
|
+
"account_name": "AZURE_STORAGE_ACCOUNT_NAME",
|
|
187
|
+
"account_key": "AZURE_STORAGE_ACCOUNT_KEY",
|
|
188
|
+
"tenant_id": "AZURE_TENANT_ID",
|
|
189
|
+
"client_id": "AZURE_CLIENT_ID",
|
|
190
|
+
"client_secret": "AZURE_CLIENT_SECRET",
|
|
191
|
+
}
|
|
192
|
+
_KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def auto(cls) -> ABFS:
|
|
196
|
+
account_name = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_name"], None)
|
|
197
|
+
account_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_key"], None)
|
|
198
|
+
tenant_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["tenant_id"], None)
|
|
199
|
+
client_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_id"], None)
|
|
200
|
+
client_secret = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_secret"], None)
|
|
201
|
+
|
|
202
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
203
|
+
kwargs = set_if_exists(kwargs, "account_name", account_name)
|
|
204
|
+
kwargs = set_if_exists(kwargs, "account_key", account_key)
|
|
205
|
+
kwargs = set_if_exists(kwargs, "tenant_id", tenant_id)
|
|
206
|
+
kwargs = set_if_exists(kwargs, "client_id", client_id)
|
|
207
|
+
kwargs = set_if_exists(kwargs, "client_secret", client_secret)
|
|
208
|
+
return ABFS(**kwargs)
|
|
209
|
+
|
|
210
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
211
|
+
kwargs.pop("anonymous", None)
|
|
212
|
+
config: typing.Dict[str, typing.Any] = {}
|
|
213
|
+
if "account_name" in kwargs or self.account_name:
|
|
214
|
+
config["account_name"] = kwargs.get("account_name", self.account_name)
|
|
215
|
+
if "account_key" in kwargs or self.account_key:
|
|
216
|
+
config["account_key"] = kwargs.get("account_key", self.account_key)
|
|
217
|
+
if "client_id" in kwargs or self.client_id:
|
|
218
|
+
config["client_id"] = kwargs.get("client_id", self.client_id)
|
|
219
|
+
if "client_secret" in kwargs or self.client_secret:
|
|
220
|
+
config["client_secret"] = kwargs.get("client_secret", self.client_secret)
|
|
221
|
+
if "tenant_id" in kwargs or self.tenant_id:
|
|
222
|
+
config["tenant_id"] = kwargs.get("tenant_id", self.tenant_id)
|
|
223
|
+
|
|
224
|
+
if anonymous:
|
|
225
|
+
config[self._KEY_SKIP_SIGNATURE] = True
|
|
226
|
+
|
|
227
|
+
client_options = {"timeout": "99999s", "allow_http": "true"}
|
|
228
|
+
|
|
229
|
+
if config:
|
|
230
|
+
kwargs["config"] = config
|
|
231
|
+
kwargs["client_options"] = client_options
|
|
232
|
+
|
|
233
|
+
return kwargs
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
# This file system is not really a filesystem, so users aren't really able to specify the remote path,
|
|
7
|
+
# at least not yet.
|
|
8
|
+
REMOTE_PLACEHOLDER = "flyte://data"
|
|
9
|
+
|
|
10
|
+
HashStructure = typing.Dict[str, typing.Tuple[bytes, int]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RemoteFSPathResolver:
|
|
14
|
+
protocol = "flyte://"
|
|
15
|
+
_flyte_path_to_remote_map: typing.ClassVar[typing.Dict[str, str]] = {}
|
|
16
|
+
_lock = threading.Lock()
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def resolve_remote_path(cls, flyte_uri: str) -> typing.Optional[str]:
|
|
20
|
+
"""
|
|
21
|
+
Given a flyte uri, return the remote path if it exists or was created in current session, otherwise return None
|
|
22
|
+
"""
|
|
23
|
+
with cls._lock:
|
|
24
|
+
if flyte_uri in cls._flyte_path_to_remote_map:
|
|
25
|
+
return cls._flyte_path_to_remote_map[flyte_uri]
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def add_mapping(cls, flyte_uri: str, remote_path: str):
|
|
30
|
+
"""
|
|
31
|
+
Thread safe method to dd a mapping from a flyte uri to a remote path
|
|
32
|
+
"""
|
|
33
|
+
with cls._lock:
|
|
34
|
+
cls._flyte_path_to_remote_map[flyte_uri] = remote_path
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
import random
|
|
4
|
+
import tempfile
|
|
5
|
+
import typing
|
|
6
|
+
from typing import AsyncIterator, Optional
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
import fsspec
|
|
10
|
+
from fsspec.asyn import AsyncFileSystem
|
|
11
|
+
from fsspec.utils import get_protocol
|
|
12
|
+
from obstore.exceptions import GenericError
|
|
13
|
+
from obstore.fsspec import register
|
|
14
|
+
|
|
15
|
+
from flyte._initialize import get_storage
|
|
16
|
+
from flyte._logging import logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Let's find a replacement
|
|
22
|
+
"""
|
|
23
|
+
protocol = get_protocol(str(path))
|
|
24
|
+
if protocol is None:
|
|
25
|
+
return False
|
|
26
|
+
return protocol != "file"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def strip_file_header(path: str) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Drops file:// if it exists from the file
|
|
32
|
+
"""
|
|
33
|
+
if path.startswith("file://"):
|
|
34
|
+
return path.replace("file://", "", 1)
|
|
35
|
+
return path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_random_local_path(file_path_or_file_name: pathlib.Path | str | None = None) -> pathlib.Path:
|
|
39
|
+
"""
|
|
40
|
+
Use file_path_or_file_name, when you want a random directory, but want to preserve the leaf file name
|
|
41
|
+
"""
|
|
42
|
+
local_tmp = pathlib.Path(tempfile.mkdtemp(prefix="flyte-tmp-"))
|
|
43
|
+
key = UUID(int=random.getrandbits(128)).hex
|
|
44
|
+
tmp_folder = local_tmp / key
|
|
45
|
+
tail = ""
|
|
46
|
+
if file_path_or_file_name:
|
|
47
|
+
_, tail = os.path.split(file_path_or_file_name)
|
|
48
|
+
if tail:
|
|
49
|
+
tmp_folder.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
return tmp_folder / tail
|
|
51
|
+
local_tmp.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
return tmp_folder
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_random_local_directory() -> pathlib.Path:
|
|
56
|
+
"""
|
|
57
|
+
:return: a random directory
|
|
58
|
+
:rtype: pathlib.Path
|
|
59
|
+
"""
|
|
60
|
+
_dir = get_random_local_path(None)
|
|
61
|
+
pathlib.Path(_dir).mkdir(parents=True, exist_ok=True)
|
|
62
|
+
return _dir
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_underlying_filesystem(
|
|
66
|
+
protocol: typing.Optional[str] = None,
|
|
67
|
+
anonymous: bool = False,
|
|
68
|
+
path: typing.Optional[str] = None,
|
|
69
|
+
**kwargs,
|
|
70
|
+
) -> fsspec.AbstractFileSystem:
|
|
71
|
+
if protocol is None:
|
|
72
|
+
# If protocol is None, get it from the path
|
|
73
|
+
protocol = get_protocol(path)
|
|
74
|
+
|
|
75
|
+
storage_config = get_storage()
|
|
76
|
+
if storage_config:
|
|
77
|
+
kwargs = storage_config.get_fsspec_kwargs(anonymous, **kwargs)
|
|
78
|
+
elif protocol:
|
|
79
|
+
match protocol:
|
|
80
|
+
case "s3":
|
|
81
|
+
# If the protocol is s3, we can use the s3 filesystem
|
|
82
|
+
from flyte.storage import S3
|
|
83
|
+
|
|
84
|
+
kwargs = S3.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
|
|
85
|
+
case "gs":
|
|
86
|
+
# If the protocol is gs, we can use the gs filesystem
|
|
87
|
+
from flyte.storage import GCS
|
|
88
|
+
|
|
89
|
+
kwargs = GCS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
|
|
90
|
+
case "abfs" | "abfss":
|
|
91
|
+
# If the protocol is abfs or abfss, we can use the abfs filesystem
|
|
92
|
+
from flyte.storage import ABFS
|
|
93
|
+
|
|
94
|
+
kwargs = ABFS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
|
|
95
|
+
case _:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
return fsspec.filesystem(protocol, **kwargs)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _get_anonymous_filesystem(from_path):
|
|
102
|
+
"""Get the anonymous file system if needed."""
|
|
103
|
+
return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
|
|
107
|
+
if not to_path:
|
|
108
|
+
name = pathlib.Path(from_path).name
|
|
109
|
+
to_path = get_random_local_path(file_path_or_file_name=name)
|
|
110
|
+
logger.debug(f"Storing file from {from_path} to {to_path}")
|
|
111
|
+
file_system = get_underlying_filesystem(path=from_path)
|
|
112
|
+
try:
|
|
113
|
+
return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
|
|
114
|
+
except (OSError, GenericError) as oe:
|
|
115
|
+
logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}")
|
|
116
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
117
|
+
try:
|
|
118
|
+
exists = await file_system._exists(from_path) # pylint: disable=W0212
|
|
119
|
+
except GenericError:
|
|
120
|
+
# for obstore, as it does not raise FileNotFoundError in fsspec but GenericError
|
|
121
|
+
# force it to try get_filesystem(anonymous=True)
|
|
122
|
+
exists = True
|
|
123
|
+
else:
|
|
124
|
+
exists = file_system.exists(from_path)
|
|
125
|
+
if not exists:
|
|
126
|
+
# TODO: update exception to be more specific
|
|
127
|
+
raise AssertionError(f"Unable to load data from {from_path}")
|
|
128
|
+
file_system = _get_anonymous_filesystem(from_path)
|
|
129
|
+
logger.debug(f"Attempting anonymous get with {file_system}")
|
|
130
|
+
return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
async def _get_from_filesystem(
|
|
134
|
+
file_system: fsspec.AbstractFileSystem,
|
|
135
|
+
from_path: str | pathlib.Path,
|
|
136
|
+
to_path: str | pathlib.Path,
|
|
137
|
+
recursive: bool,
|
|
138
|
+
**kwargs,
|
|
139
|
+
):
|
|
140
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
141
|
+
dst = await file_system._get(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
142
|
+
else:
|
|
143
|
+
dst = file_system.get(from_path, to_path, recursive=recursive, **kwargs)
|
|
144
|
+
|
|
145
|
+
if isinstance(dst, (str, pathlib.Path)):
|
|
146
|
+
return dst
|
|
147
|
+
return to_path
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
|
|
151
|
+
if not to_path:
|
|
152
|
+
from flyte._context import internal_ctx
|
|
153
|
+
|
|
154
|
+
ctx = internal_ctx()
|
|
155
|
+
name = pathlib.Path(from_path).name if not recursive else None # don't pass a name for folders
|
|
156
|
+
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
157
|
+
|
|
158
|
+
file_system = get_underlying_filesystem(path=to_path)
|
|
159
|
+
from_path = strip_file_header(from_path)
|
|
160
|
+
if isinstance(file_system, AsyncFileSystem):
|
|
161
|
+
dst = await file_system._put(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
|
|
162
|
+
else:
|
|
163
|
+
dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
|
|
164
|
+
if isinstance(dst, (str, pathlib.Path)):
|
|
165
|
+
return str(dst)
|
|
166
|
+
else:
|
|
167
|
+
return to_path
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def put_stream(
|
|
171
|
+
data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
|
|
172
|
+
) -> str:
|
|
173
|
+
"""
|
|
174
|
+
Put a stream of data to a remote location. This is useful for streaming data to a remote location.
|
|
175
|
+
Example usage:
|
|
176
|
+
```python
|
|
177
|
+
import flyte.storage as storage
|
|
178
|
+
storage.put_stream(iter([b'hello']), name="my_file.txt")
|
|
179
|
+
OR
|
|
180
|
+
storage.put_stream(iter([b'hello']), to_path="s3://my_bucket/my_file.txt")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
:param data_iterable: Iterable of bytes to be streamed.
|
|
184
|
+
:param name: Name of the file to be created. If not provided, a random name will be generated.
|
|
185
|
+
:param to_path: Path to the remote location where the data will be stored.
|
|
186
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
187
|
+
:rtype: str
|
|
188
|
+
:return: The path to the remote location where the data was stored.
|
|
189
|
+
"""
|
|
190
|
+
if not to_path:
|
|
191
|
+
from flyte._context import internal_ctx
|
|
192
|
+
|
|
193
|
+
ctx = internal_ctx()
|
|
194
|
+
to_path = ctx.raw_data.get_random_remote_path(file_name=name)
|
|
195
|
+
fs = get_underlying_filesystem(path=to_path)
|
|
196
|
+
file_handle = None
|
|
197
|
+
if isinstance(fs, AsyncFileSystem):
|
|
198
|
+
try:
|
|
199
|
+
file_handle = await fs.open_async(to_path, "wb", **kwargs)
|
|
200
|
+
if isinstance(data_iterable, bytes):
|
|
201
|
+
await file_handle.write(data_iterable)
|
|
202
|
+
else:
|
|
203
|
+
async for data in data_iterable:
|
|
204
|
+
await file_handle.write(data)
|
|
205
|
+
return str(to_path)
|
|
206
|
+
except NotImplementedError:
|
|
207
|
+
logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
|
|
208
|
+
finally:
|
|
209
|
+
if file_handle is not None:
|
|
210
|
+
await file_handle.close()
|
|
211
|
+
|
|
212
|
+
with fs.open(to_path, "wb", **kwargs) as f:
|
|
213
|
+
if isinstance(data_iterable, bytes):
|
|
214
|
+
f.write(data_iterable)
|
|
215
|
+
else:
|
|
216
|
+
# If data_iterable is async iterable, iterate over it and write each chunk to the file
|
|
217
|
+
async for data in data_iterable:
|
|
218
|
+
f.write(data)
|
|
219
|
+
return str(to_path)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncIterator[bytes]:
|
|
223
|
+
"""
|
|
224
|
+
Get a stream of data from a remote location.
|
|
225
|
+
This is useful for downloading streaming data from a remote location.
|
|
226
|
+
Example usage:
|
|
227
|
+
```python
|
|
228
|
+
import flyte.storage as storage
|
|
229
|
+
obj = storage.get_stream(path="s3://my_bucket/my_file.txt")
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
:param path: Path to the remote location where the data will be downloaded.
|
|
233
|
+
:param kwargs: Additional arguments to be passed to the underlying filesystem.
|
|
234
|
+
:param chunk_size: Size of each chunk to be read from the file.
|
|
235
|
+
:return: An async iterator that yields chunks of data.
|
|
236
|
+
"""
|
|
237
|
+
fs = get_underlying_filesystem(path=path, **kwargs)
|
|
238
|
+
file_size = fs.info(path)["size"]
|
|
239
|
+
total_read = 0
|
|
240
|
+
file_handle = None
|
|
241
|
+
try:
|
|
242
|
+
if isinstance(fs, AsyncFileSystem):
|
|
243
|
+
file_handle = await fs.open_async(path, "rb")
|
|
244
|
+
while chunk := await file_handle.read(min(chunk_size, file_size - total_read)):
|
|
245
|
+
total_read += len(chunk)
|
|
246
|
+
yield chunk
|
|
247
|
+
return
|
|
248
|
+
except NotImplementedError:
|
|
249
|
+
logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
|
|
250
|
+
finally:
|
|
251
|
+
if file_handle is not None:
|
|
252
|
+
file_handle.close()
|
|
253
|
+
|
|
254
|
+
# Sync fallback
|
|
255
|
+
with fs.open(path, "rb") as file_handle:
|
|
256
|
+
while chunk := file_handle.read(min(chunk_size, file_size - total_read)):
|
|
257
|
+
total_read += len(chunk)
|
|
258
|
+
yield chunk
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def join(*paths: str) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Join multiple paths together. This is a wrapper around os.path.join.
|
|
264
|
+
# TODO replace with proper join with fsspec root etc
|
|
265
|
+
|
|
266
|
+
:param paths: Paths to be joined.
|
|
267
|
+
"""
|
|
268
|
+
return str(os.path.join(*paths))
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
register(["s3", "gs", "abfs", "abfss"], asynchronous=True)
|
flyte/storage/_utils.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# This is the default chunk size flyte will use for writing to S3 and GCS. This is set to 25MB by default and is
|
|
4
|
+
# configurable by the user if needed. This is used when put() is called on filesystems.
|
|
5
|
+
_WRITE_SIZE_CHUNK_BYTES = int(os.environ.get("_F_P_WRITE_CHUNK_SIZE", "26214400")) # 25 * 2**20
|