flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (219) hide show
  1. flyte/__init__.py +78 -2
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/runtime.py +152 -0
  4. flyte/_build.py +26 -0
  5. flyte/_cache/__init__.py +12 -0
  6. flyte/_cache/cache.py +145 -0
  7. flyte/_cache/defaults.py +9 -0
  8. flyte/_cache/policy_function_body.py +42 -0
  9. flyte/_code_bundle/__init__.py +8 -0
  10. flyte/_code_bundle/_ignore.py +113 -0
  11. flyte/_code_bundle/_packaging.py +187 -0
  12. flyte/_code_bundle/_utils.py +323 -0
  13. flyte/_code_bundle/bundle.py +209 -0
  14. flyte/_context.py +152 -0
  15. flyte/_deploy.py +243 -0
  16. flyte/_doc.py +29 -0
  17. flyte/_docstring.py +32 -0
  18. flyte/_environment.py +84 -0
  19. flyte/_excepthook.py +37 -0
  20. flyte/_group.py +32 -0
  21. flyte/_hash.py +23 -0
  22. flyte/_image.py +762 -0
  23. flyte/_initialize.py +492 -0
  24. flyte/_interface.py +84 -0
  25. flyte/_internal/__init__.py +3 -0
  26. flyte/_internal/controllers/__init__.py +128 -0
  27. flyte/_internal/controllers/_local_controller.py +193 -0
  28. flyte/_internal/controllers/_trace.py +41 -0
  29. flyte/_internal/controllers/remote/__init__.py +60 -0
  30. flyte/_internal/controllers/remote/_action.py +146 -0
  31. flyte/_internal/controllers/remote/_client.py +47 -0
  32. flyte/_internal/controllers/remote/_controller.py +494 -0
  33. flyte/_internal/controllers/remote/_core.py +410 -0
  34. flyte/_internal/controllers/remote/_informer.py +361 -0
  35. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  36. flyte/_internal/imagebuild/__init__.py +11 -0
  37. flyte/_internal/imagebuild/docker_builder.py +427 -0
  38. flyte/_internal/imagebuild/image_builder.py +246 -0
  39. flyte/_internal/imagebuild/remote_builder.py +0 -0
  40. flyte/_internal/resolvers/__init__.py +0 -0
  41. flyte/_internal/resolvers/_task_module.py +54 -0
  42. flyte/_internal/resolvers/common.py +31 -0
  43. flyte/_internal/resolvers/default.py +28 -0
  44. flyte/_internal/runtime/__init__.py +0 -0
  45. flyte/_internal/runtime/convert.py +342 -0
  46. flyte/_internal/runtime/entrypoints.py +135 -0
  47. flyte/_internal/runtime/io.py +136 -0
  48. flyte/_internal/runtime/resources_serde.py +138 -0
  49. flyte/_internal/runtime/task_serde.py +330 -0
  50. flyte/_internal/runtime/taskrunner.py +191 -0
  51. flyte/_internal/runtime/types_serde.py +54 -0
  52. flyte/_logging.py +135 -0
  53. flyte/_map.py +215 -0
  54. flyte/_pod.py +19 -0
  55. flyte/_protos/__init__.py +0 -0
  56. flyte/_protos/common/authorization_pb2.py +66 -0
  57. flyte/_protos/common/authorization_pb2.pyi +108 -0
  58. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  59. flyte/_protos/common/identifier_pb2.py +71 -0
  60. flyte/_protos/common/identifier_pb2.pyi +82 -0
  61. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  62. flyte/_protos/common/identity_pb2.py +48 -0
  63. flyte/_protos/common/identity_pb2.pyi +72 -0
  64. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  65. flyte/_protos/common/list_pb2.py +36 -0
  66. flyte/_protos/common/list_pb2.pyi +71 -0
  67. flyte/_protos/common/list_pb2_grpc.py +4 -0
  68. flyte/_protos/common/policy_pb2.py +37 -0
  69. flyte/_protos/common/policy_pb2.pyi +27 -0
  70. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  71. flyte/_protos/common/role_pb2.py +37 -0
  72. flyte/_protos/common/role_pb2.pyi +53 -0
  73. flyte/_protos/common/role_pb2_grpc.py +4 -0
  74. flyte/_protos/common/runtime_version_pb2.py +28 -0
  75. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  76. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  77. flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
  78. flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
  79. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  80. flyte/_protos/secret/definition_pb2.py +49 -0
  81. flyte/_protos/secret/definition_pb2.pyi +93 -0
  82. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  83. flyte/_protos/secret/payload_pb2.py +62 -0
  84. flyte/_protos/secret/payload_pb2.pyi +94 -0
  85. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  86. flyte/_protos/secret/secret_pb2.py +38 -0
  87. flyte/_protos/secret/secret_pb2.pyi +6 -0
  88. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  89. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  90. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  91. flyte/_protos/workflow/common_pb2.py +27 -0
  92. flyte/_protos/workflow/common_pb2.pyi +14 -0
  93. flyte/_protos/workflow/common_pb2_grpc.py +4 -0
  94. flyte/_protos/workflow/environment_pb2.py +29 -0
  95. flyte/_protos/workflow/environment_pb2.pyi +12 -0
  96. flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
  97. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  98. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  99. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  100. flyte/_protos/workflow/queue_service_pb2.py +105 -0
  101. flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
  102. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  103. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  104. flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
  105. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  106. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  107. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  108. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  109. flyte/_protos/workflow/run_service_pb2.py +129 -0
  110. flyte/_protos/workflow/run_service_pb2.pyi +171 -0
  111. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  112. flyte/_protos/workflow/state_service_pb2.py +66 -0
  113. flyte/_protos/workflow/state_service_pb2.pyi +75 -0
  114. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  115. flyte/_protos/workflow/task_definition_pb2.py +79 -0
  116. flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
  117. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  118. flyte/_protos/workflow/task_service_pb2.py +60 -0
  119. flyte/_protos/workflow/task_service_pb2.pyi +59 -0
  120. flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
  121. flyte/_resources.py +226 -0
  122. flyte/_retry.py +32 -0
  123. flyte/_reusable_environment.py +25 -0
  124. flyte/_run.py +482 -0
  125. flyte/_secret.py +61 -0
  126. flyte/_task.py +449 -0
  127. flyte/_task_environment.py +183 -0
  128. flyte/_timeout.py +47 -0
  129. flyte/_tools.py +27 -0
  130. flyte/_trace.py +120 -0
  131. flyte/_utils/__init__.py +26 -0
  132. flyte/_utils/asyn.py +119 -0
  133. flyte/_utils/async_cache.py +139 -0
  134. flyte/_utils/coro_management.py +23 -0
  135. flyte/_utils/file_handling.py +72 -0
  136. flyte/_utils/helpers.py +134 -0
  137. flyte/_utils/lazy_module.py +54 -0
  138. flyte/_utils/org_discovery.py +57 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/cli/__init__.py +3 -0
  142. flyte/cli/_abort.py +28 -0
  143. flyte/cli/_common.py +337 -0
  144. flyte/cli/_create.py +145 -0
  145. flyte/cli/_delete.py +23 -0
  146. flyte/cli/_deploy.py +152 -0
  147. flyte/cli/_gen.py +163 -0
  148. flyte/cli/_get.py +310 -0
  149. flyte/cli/_params.py +538 -0
  150. flyte/cli/_run.py +231 -0
  151. flyte/cli/main.py +166 -0
  152. flyte/config/__init__.py +3 -0
  153. flyte/config/_config.py +216 -0
  154. flyte/config/_internal.py +64 -0
  155. flyte/config/_reader.py +207 -0
  156. flyte/connectors/__init__.py +0 -0
  157. flyte/errors.py +172 -0
  158. flyte/extras/__init__.py +5 -0
  159. flyte/extras/_container.py +263 -0
  160. flyte/io/__init__.py +27 -0
  161. flyte/io/_dir.py +448 -0
  162. flyte/io/_file.py +467 -0
  163. flyte/io/_structured_dataset/__init__.py +129 -0
  164. flyte/io/_structured_dataset/basic_dfs.py +219 -0
  165. flyte/io/_structured_dataset/structured_dataset.py +1061 -0
  166. flyte/models.py +391 -0
  167. flyte/remote/__init__.py +26 -0
  168. flyte/remote/_client/__init__.py +0 -0
  169. flyte/remote/_client/_protocols.py +133 -0
  170. flyte/remote/_client/auth/__init__.py +12 -0
  171. flyte/remote/_client/auth/_auth_utils.py +14 -0
  172. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  173. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  174. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  175. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  176. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  177. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  178. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  179. flyte/remote/_client/auth/_channel.py +215 -0
  180. flyte/remote/_client/auth/_client_config.py +83 -0
  181. flyte/remote/_client/auth/_default_html.py +32 -0
  182. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  183. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  184. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  185. flyte/remote/_client/auth/_keyring.py +143 -0
  186. flyte/remote/_client/auth/_token_client.py +260 -0
  187. flyte/remote/_client/auth/errors.py +16 -0
  188. flyte/remote/_client/controlplane.py +95 -0
  189. flyte/remote/_console.py +18 -0
  190. flyte/remote/_data.py +159 -0
  191. flyte/remote/_logs.py +176 -0
  192. flyte/remote/_project.py +85 -0
  193. flyte/remote/_run.py +970 -0
  194. flyte/remote/_secret.py +132 -0
  195. flyte/remote/_task.py +391 -0
  196. flyte/report/__init__.py +3 -0
  197. flyte/report/_report.py +178 -0
  198. flyte/report/_template.html +124 -0
  199. flyte/storage/__init__.py +29 -0
  200. flyte/storage/_config.py +233 -0
  201. flyte/storage/_remote_fs.py +34 -0
  202. flyte/storage/_storage.py +271 -0
  203. flyte/storage/_utils.py +5 -0
  204. flyte/syncify/__init__.py +56 -0
  205. flyte/syncify/_api.py +371 -0
  206. flyte/types/__init__.py +36 -0
  207. flyte/types/_interface.py +40 -0
  208. flyte/types/_pickle.py +118 -0
  209. flyte/types/_renderer.py +162 -0
  210. flyte/types/_string_literals.py +120 -0
  211. flyte/types/_type_engine.py +2287 -0
  212. flyte/types/_utils.py +80 -0
  213. flyte-0.2.0a0.dist-info/METADATA +249 -0
  214. flyte-0.2.0a0.dist-info/RECORD +218 -0
  215. {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
  216. flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
  217. flyte-0.2.0a0.dist-info/top_level.txt +1 -0
  218. flyte-0.1.0.dist-info/METADATA +0 -6
  219. flyte-0.1.0.dist-info/RECORD +0 -5
@@ -0,0 +1,124 @@
1
+ <!doctype html>
2
+ <html lang="">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <title>User Content</title>
6
+ <meta name="viewport" content="width=device-width, initial-scale=1">
7
+ <link href="https://fonts.googleapis.com/css?family=Lato:300,400,700%7COpen+Sans:400,700" rel="stylesheet">
8
+ <style>
9
+ ol, ul {
10
+ list-style: none;
11
+ }
12
+
13
+ table {
14
+ border-collapse: collapse;
15
+ border-spacing: 0;
16
+ }
17
+
18
+ #flyte-frame-nav {
19
+ display: flex;
20
+ width: 100%;
21
+ }
22
+
23
+ #flyte-frame-tabs {
24
+ display: flex;
25
+ width: 100%;
26
+ justify-content: center;
27
+ margin-block: 0;
28
+ padding-inline-start: 0;
29
+ }
30
+
31
+ #flyte-frame-tabs li {
32
+ cursor: pointer;
33
+ padding: 8px;
34
+ margin: 0;
35
+ margin-right: 12px;
36
+ font-size: 14px;
37
+ line-height: 20px;
38
+ font-weight: 700;
39
+ font-style: normal;
40
+ font-family: Open Sans, helvetica, arial, sans-serif;
41
+ color: #666666;
42
+ width: 126px;
43
+ text-align: center;
44
+ }
45
+
46
+ #flyte-frame-tabs li:last-child {
47
+ margin-right: 0;
48
+ }
49
+
50
+ #flyte-frame-tabs li.active {
51
+ border-bottom: 4px solid rgb(163, 26, 255);
52
+ color: #333333;
53
+ }
54
+
55
+ #flyte-frame-container {
56
+ width: auto;
57
+ }
58
+
59
+ #flyte-frame-container > div {
60
+ display: None;
61
+ }
62
+
63
+ #flyte-frame-container > div.active {
64
+ display: block;
65
+ padding: 2rem 2rem;
66
+ }
67
+
68
+ </style>
69
+
70
+ </head>
71
+ <body>
72
+ <nav id="flyte-frame-nav">
73
+ <ul id="flyte-frame-tabs">
74
+ $NAV_HTML
75
+ </ul>
76
+ </nav>
77
+ <div id="flyte-frame-container">
78
+ $BODY_HTML
79
+ </div>
80
+ </body>
81
+ <script>
82
+ const setTabs = index => {
83
+ const container = document.getElementById('flyte-frame-tabs')
84
+ for (let i = 0; i < container.children.length; i++) {
85
+ const tabIndex = container.children[i].getAttribute('link_index')
86
+ if (tabIndex === index) {
87
+ container.children[i].classList.add('active')
88
+ } else {
89
+ container.children[i].className = ''
90
+ }
91
+ }
92
+ }
93
+ const setContent = index => {
94
+ const container = document.getElementById('flyte-frame-container')
95
+ for (let i = 0; i < container.children.length; i++) {
96
+ const tabIndex = container.children[i].getAttribute('link_index')
97
+ if (tabIndex === index) {
98
+ container.children[i].classList.add('active')
99
+ } else {
100
+ container.children[i].className = ''
101
+ }
102
+ }
103
+ }
104
+ const setLinkIndex = index => {
105
+ setTabs(index)
106
+ setContent(index)
107
+ }
108
+ const handleLinkClick = e => {
109
+ const linkIndex = e.getAttribute('link_index');
110
+ setLinkIndex(linkIndex)
111
+ }
112
+
113
+ const tabs = document.getElementById('flyte-frame-tabs');
114
+ const containers = document.getElementById('flyte-frame-container');
115
+ for(var i = 0; i < tabs.children.length; i++) {
116
+ if (i === 0) {
117
+ tabs.children[i].classList.add('active')
118
+ containers.children[i].classList.add('active')
119
+ }
120
+ tabs.children[i].setAttribute("link_index", i+1)
121
+ containers.children[i].setAttribute("link_index", i+1)
122
+ }
123
+ </script>
124
+ </html>
@@ -0,0 +1,29 @@
1
+ __all__ = [
2
+ "ABFS",
3
+ "GCS",
4
+ "S3",
5
+ "Storage",
6
+ "get",
7
+ "get_random_local_directory",
8
+ "get_random_local_path",
9
+ "get_stream",
10
+ "get_underlying_filesystem",
11
+ "is_remote",
12
+ "join",
13
+ "put",
14
+ "put_stream",
15
+ "put_stream",
16
+ ]
17
+
18
+ from ._config import ABFS, GCS, S3, Storage
19
+ from ._storage import (
20
+ get,
21
+ get_random_local_directory,
22
+ get_random_local_path,
23
+ get_stream,
24
+ get_underlying_filesystem,
25
+ is_remote,
26
+ join,
27
+ put,
28
+ put_stream,
29
+ )
@@ -0,0 +1,233 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import os
5
+ import typing
6
+ from dataclasses import dataclass
7
+ from typing import ClassVar
8
+
9
+ from flyte.config import set_if_exists
10
+
11
+
12
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
13
+ class Storage(object):
14
+ """
15
+ Data storage configuration that applies across any provider.
16
+ """
17
+
18
+ retries: int = 3
19
+ backoff: datetime.timedelta = datetime.timedelta(seconds=5)
20
+ enable_debug: bool = False
21
+ attach_execution_metadata: bool = True
22
+
23
+ _KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
24
+ "enable_debug": "UNION_STORAGE_DEBUG",
25
+ "retries": "UNION_STORAGE_RETRIES",
26
+ "backoff": "UNION_STORAGE_BACKOFF_SECONDS",
27
+ }
28
+
29
+ def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
30
+ """
31
+ Returns the configuration as kwargs for constructing an fsspec filesystem.
32
+ """
33
+ return {}
34
+
35
+ @classmethod
36
+ def _auto_as_kwargs(cls) -> typing.Dict[str, typing.Any]:
37
+ retries = os.getenv(cls._KEY_ENV_VAR_MAPPING["retries"])
38
+ backoff = os.getenv(cls._KEY_ENV_VAR_MAPPING["backoff"])
39
+ enable_debug = os.getenv(cls._KEY_ENV_VAR_MAPPING["enable_debug"])
40
+
41
+ kwargs: typing.Dict[str, typing.Any] = {}
42
+ kwargs = set_if_exists(kwargs, "enable_debug", enable_debug)
43
+ kwargs = set_if_exists(kwargs, "retries", retries)
44
+ kwargs = set_if_exists(kwargs, "backoff", backoff)
45
+ return kwargs
46
+
47
+ @classmethod
48
+ def auto(cls) -> Storage:
49
+ """
50
+ Construct the config object automatically from environment variables.
51
+ """
52
+ return cls(**cls._auto_as_kwargs())
53
+
54
+
55
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
56
+ class S3(Storage):
57
+ """
58
+ S3 specific configuration
59
+ """
60
+
61
+ endpoint: typing.Optional[str] = None
62
+ access_key_id: typing.Optional[str] = None
63
+ secret_access_key: typing.Optional[str] = None
64
+
65
+ _KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
66
+ "endpoint": "FLYTE_AWS_ENDPOINT",
67
+ "access_key_id": "FLYTE_AWS_ACCESS_KEY_ID",
68
+ "secret_access_key": "FLYTE_AWS_SECRET_ACCESS_KEY",
69
+ } | Storage._KEY_ENV_VAR_MAPPING
70
+
71
+ # Refer to https://github.com/developmentseed/obstore/blob/33654fc37f19a657689eb93327b621e9f9e01494/obstore/python/obstore/store/_aws.pyi#L11
72
+ # for key and secret
73
+ _CONFIG_KEY_FSSPEC_S3_KEY_ID: ClassVar = "access_key_id"
74
+ _CONFIG_KEY_FSSPEC_S3_SECRET: ClassVar = "secret_access_key"
75
+ _CONFIG_KEY_ENDPOINT: ClassVar = "endpoint_url"
76
+ _KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
77
+
78
+ @classmethod
79
+ def auto(cls) -> S3:
80
+ """
81
+ :return: Config
82
+ """
83
+ endpoint = os.getenv(cls._KEY_ENV_VAR_MAPPING["endpoint"], None)
84
+ access_key_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["access_key_id"], None)
85
+ secret_access_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["secret_access_key"], None)
86
+
87
+ kwargs = super()._auto_as_kwargs()
88
+ kwargs = set_if_exists(kwargs, "endpoint", endpoint)
89
+ kwargs = set_if_exists(kwargs, "access_key_id", access_key_id)
90
+ kwargs = set_if_exists(kwargs, "secret_access_key", secret_access_key)
91
+
92
+ return S3(**kwargs)
93
+
94
+ @classmethod
95
+ def for_sandbox(cls) -> S3:
96
+ """
97
+ :return:
98
+ """
99
+ kwargs = super()._auto_as_kwargs()
100
+ final_kwargs = kwargs | {
101
+ "endpoint": "http://localhost:4566",
102
+ "access_key_id": "minio",
103
+ "secret_access_key": "miniostorage",
104
+ }
105
+ return S3(**final_kwargs)
106
+
107
+ def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
108
+ # Construct the config object
109
+ kwargs.pop("anonymous", None) # Remove anonymous if it exists, as we handle it separately
110
+ config: typing.Dict[str, typing.Any] = {}
111
+ if self._CONFIG_KEY_FSSPEC_S3_KEY_ID in kwargs or self.access_key_id:
112
+ config[self._CONFIG_KEY_FSSPEC_S3_KEY_ID] = kwargs.pop(
113
+ self._CONFIG_KEY_FSSPEC_S3_KEY_ID, self.access_key_id
114
+ )
115
+ if self._CONFIG_KEY_FSSPEC_S3_SECRET in kwargs or self.secret_access_key:
116
+ config[self._CONFIG_KEY_FSSPEC_S3_SECRET] = kwargs.pop(
117
+ self._CONFIG_KEY_FSSPEC_S3_SECRET, self.secret_access_key
118
+ )
119
+ if self._CONFIG_KEY_ENDPOINT in kwargs or self.endpoint:
120
+ config["endpoint_url"] = kwargs.pop(self._CONFIG_KEY_ENDPOINT, self.endpoint)
121
+
122
+ retries = kwargs.pop("retries", self.retries)
123
+ backoff = kwargs.pop("backoff", self.backoff)
124
+
125
+ if anonymous:
126
+ config[self._KEY_SKIP_SIGNATURE] = True
127
+
128
+ retry_config = {
129
+ "max_retries": retries,
130
+ "backoff": {
131
+ "base": 2,
132
+ "init_backoff": backoff,
133
+ "max_backoff": datetime.timedelta(seconds=16),
134
+ },
135
+ "retry_timeout": datetime.timedelta(minutes=3),
136
+ }
137
+
138
+ client_options = {"timeout": "99999s", "allow_http": True}
139
+
140
+ if config:
141
+ kwargs["config"] = config
142
+ kwargs["client_options"] = client_options or None
143
+ kwargs["retry_config"] = retry_config or None
144
+
145
+ return kwargs
146
+
147
+
148
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
149
+ class GCS(Storage):
150
+ """
151
+ Any GCS specific configuration.
152
+ """
153
+
154
+ gsutil_parallelism: bool = False
155
+
156
+ _KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
157
+ "gsutil_parallelism": "GCP_GSUTIL_PARALLELISM",
158
+ }
159
+
160
+ @classmethod
161
+ def auto(cls) -> GCS:
162
+ gsutil_parallelism = os.getenv(cls._KEY_ENV_VAR_MAPPING["gsutil_parallelism"], None)
163
+
164
+ kwargs: typing.Dict[str, typing.Any] = {}
165
+ kwargs = set_if_exists(kwargs, "gsutil_parallelism", gsutil_parallelism)
166
+ return GCS(**kwargs)
167
+
168
+ def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
169
+ kwargs.pop("anonymous", None)
170
+ return kwargs
171
+
172
+
173
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
174
+ class ABFS(Storage):
175
+ """
176
+ Any Azure Blob Storage specific configuration.
177
+ """
178
+
179
+ account_name: typing.Optional[str] = None
180
+ account_key: typing.Optional[str] = None
181
+ tenant_id: typing.Optional[str] = None
182
+ client_id: typing.Optional[str] = None
183
+ client_secret: typing.Optional[str] = None
184
+
185
+ _KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
186
+ "account_name": "AZURE_STORAGE_ACCOUNT_NAME",
187
+ "account_key": "AZURE_STORAGE_ACCOUNT_KEY",
188
+ "tenant_id": "AZURE_TENANT_ID",
189
+ "client_id": "AZURE_CLIENT_ID",
190
+ "client_secret": "AZURE_CLIENT_SECRET",
191
+ }
192
+ _KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
193
+
194
+ @classmethod
195
+ def auto(cls) -> ABFS:
196
+ account_name = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_name"], None)
197
+ account_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_key"], None)
198
+ tenant_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["tenant_id"], None)
199
+ client_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_id"], None)
200
+ client_secret = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_secret"], None)
201
+
202
+ kwargs: typing.Dict[str, typing.Any] = {}
203
+ kwargs = set_if_exists(kwargs, "account_name", account_name)
204
+ kwargs = set_if_exists(kwargs, "account_key", account_key)
205
+ kwargs = set_if_exists(kwargs, "tenant_id", tenant_id)
206
+ kwargs = set_if_exists(kwargs, "client_id", client_id)
207
+ kwargs = set_if_exists(kwargs, "client_secret", client_secret)
208
+ return ABFS(**kwargs)
209
+
210
+ def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
211
+ kwargs.pop("anonymous", None)
212
+ config: typing.Dict[str, typing.Any] = {}
213
+ if "account_name" in kwargs or self.account_name:
214
+ config["account_name"] = kwargs.get("account_name", self.account_name)
215
+ if "account_key" in kwargs or self.account_key:
216
+ config["account_key"] = kwargs.get("account_key", self.account_key)
217
+ if "client_id" in kwargs or self.client_id:
218
+ config["client_id"] = kwargs.get("client_id", self.client_id)
219
+ if "client_secret" in kwargs or self.client_secret:
220
+ config["client_secret"] = kwargs.get("client_secret", self.client_secret)
221
+ if "tenant_id" in kwargs or self.tenant_id:
222
+ config["tenant_id"] = kwargs.get("tenant_id", self.tenant_id)
223
+
224
+ if anonymous:
225
+ config[self._KEY_SKIP_SIGNATURE] = True
226
+
227
+ client_options = {"timeout": "99999s", "allow_http": "true"}
228
+
229
+ if config:
230
+ kwargs["config"] = config
231
+ kwargs["client_options"] = client_options
232
+
233
+ return kwargs
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ import typing
5
+
6
+ # This file system is not really a filesystem, so users aren't really able to specify the remote path,
7
+ # at least not yet.
8
+ REMOTE_PLACEHOLDER = "flyte://data"
9
+
10
+ HashStructure = typing.Dict[str, typing.Tuple[bytes, int]]
11
+
12
+
13
+ class RemoteFSPathResolver:
14
+ protocol = "flyte://"
15
+ _flyte_path_to_remote_map: typing.ClassVar[typing.Dict[str, str]] = {}
16
+ _lock = threading.Lock()
17
+
18
+ @classmethod
19
+ def resolve_remote_path(cls, flyte_uri: str) -> typing.Optional[str]:
20
+ """
21
+ Given a flyte uri, return the remote path if it exists or was created in current session, otherwise return None
22
+ """
23
+ with cls._lock:
24
+ if flyte_uri in cls._flyte_path_to_remote_map:
25
+ return cls._flyte_path_to_remote_map[flyte_uri]
26
+ return None
27
+
28
+ @classmethod
29
+ def add_mapping(cls, flyte_uri: str, remote_path: str):
30
+ """
31
+ Thread safe method to dd a mapping from a flyte uri to a remote path
32
+ """
33
+ with cls._lock:
34
+ cls._flyte_path_to_remote_map[flyte_uri] = remote_path
@@ -0,0 +1,271 @@
1
+ import os
2
+ import pathlib
3
+ import random
4
+ import tempfile
5
+ import typing
6
+ from typing import AsyncIterator, Optional
7
+ from uuid import UUID
8
+
9
+ import fsspec
10
+ from fsspec.asyn import AsyncFileSystem
11
+ from fsspec.utils import get_protocol
12
+ from obstore.exceptions import GenericError
13
+ from obstore.fsspec import register
14
+
15
+ from flyte._initialize import get_storage
16
+ from flyte._logging import logger
17
+
18
+
19
+ def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
20
+ """
21
+ Let's find a replacement
22
+ """
23
+ protocol = get_protocol(str(path))
24
+ if protocol is None:
25
+ return False
26
+ return protocol != "file"
27
+
28
+
29
+ def strip_file_header(path: str) -> str:
30
+ """
31
+ Drops file:// if it exists from the file
32
+ """
33
+ if path.startswith("file://"):
34
+ return path.replace("file://", "", 1)
35
+ return path
36
+
37
+
38
+ def get_random_local_path(file_path_or_file_name: pathlib.Path | str | None = None) -> pathlib.Path:
39
+ """
40
+ Use file_path_or_file_name, when you want a random directory, but want to preserve the leaf file name
41
+ """
42
+ local_tmp = pathlib.Path(tempfile.mkdtemp(prefix="flyte-tmp-"))
43
+ key = UUID(int=random.getrandbits(128)).hex
44
+ tmp_folder = local_tmp / key
45
+ tail = ""
46
+ if file_path_or_file_name:
47
+ _, tail = os.path.split(file_path_or_file_name)
48
+ if tail:
49
+ tmp_folder.mkdir(parents=True, exist_ok=True)
50
+ return tmp_folder / tail
51
+ local_tmp.mkdir(parents=True, exist_ok=True)
52
+ return tmp_folder
53
+
54
+
55
+ def get_random_local_directory() -> pathlib.Path:
56
+ """
57
+ :return: a random directory
58
+ :rtype: pathlib.Path
59
+ """
60
+ _dir = get_random_local_path(None)
61
+ pathlib.Path(_dir).mkdir(parents=True, exist_ok=True)
62
+ return _dir
63
+
64
+
65
+ def get_underlying_filesystem(
66
+ protocol: typing.Optional[str] = None,
67
+ anonymous: bool = False,
68
+ path: typing.Optional[str] = None,
69
+ **kwargs,
70
+ ) -> fsspec.AbstractFileSystem:
71
+ if protocol is None:
72
+ # If protocol is None, get it from the path
73
+ protocol = get_protocol(path)
74
+
75
+ storage_config = get_storage()
76
+ if storage_config:
77
+ kwargs = storage_config.get_fsspec_kwargs(anonymous, **kwargs)
78
+ elif protocol:
79
+ match protocol:
80
+ case "s3":
81
+ # If the protocol is s3, we can use the s3 filesystem
82
+ from flyte.storage import S3
83
+
84
+ kwargs = S3.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
85
+ case "gs":
86
+ # If the protocol is gs, we can use the gs filesystem
87
+ from flyte.storage import GCS
88
+
89
+ kwargs = GCS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
90
+ case "abfs" | "abfss":
91
+ # If the protocol is abfs or abfss, we can use the abfs filesystem
92
+ from flyte.storage import ABFS
93
+
94
+ kwargs = ABFS.auto().get_fsspec_kwargs(anonymous=anonymous, **kwargs)
95
+ case _:
96
+ pass
97
+
98
+ return fsspec.filesystem(protocol, **kwargs)
99
+
100
+
101
+ def _get_anonymous_filesystem(from_path):
102
+ """Get the anonymous file system if needed."""
103
+ return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
104
+
105
+
106
+ async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
107
+ if not to_path:
108
+ name = pathlib.Path(from_path).name
109
+ to_path = get_random_local_path(file_path_or_file_name=name)
110
+ logger.debug(f"Storing file from {from_path} to {to_path}")
111
+ file_system = get_underlying_filesystem(path=from_path)
112
+ try:
113
+ return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
114
+ except (OSError, GenericError) as oe:
115
+ logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}")
116
+ if isinstance(file_system, AsyncFileSystem):
117
+ try:
118
+ exists = await file_system._exists(from_path) # pylint: disable=W0212
119
+ except GenericError:
120
+ # for obstore, as it does not raise FileNotFoundError in fsspec but GenericError
121
+ # force it to try get_filesystem(anonymous=True)
122
+ exists = True
123
+ else:
124
+ exists = file_system.exists(from_path)
125
+ if not exists:
126
+ # TODO: update exception to be more specific
127
+ raise AssertionError(f"Unable to load data from {from_path}")
128
+ file_system = _get_anonymous_filesystem(from_path)
129
+ logger.debug(f"Attempting anonymous get with {file_system}")
130
+ return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
131
+
132
+
133
+ async def _get_from_filesystem(
134
+ file_system: fsspec.AbstractFileSystem,
135
+ from_path: str | pathlib.Path,
136
+ to_path: str | pathlib.Path,
137
+ recursive: bool,
138
+ **kwargs,
139
+ ):
140
+ if isinstance(file_system, AsyncFileSystem):
141
+ dst = await file_system._get(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
142
+ else:
143
+ dst = file_system.get(from_path, to_path, recursive=recursive, **kwargs)
144
+
145
+ if isinstance(dst, (str, pathlib.Path)):
146
+ return dst
147
+ return to_path
148
+
149
+
150
+ async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
151
+ if not to_path:
152
+ from flyte._context import internal_ctx
153
+
154
+ ctx = internal_ctx()
155
+ name = pathlib.Path(from_path).name if not recursive else None # don't pass a name for folders
156
+ to_path = ctx.raw_data.get_random_remote_path(file_name=name)
157
+
158
+ file_system = get_underlying_filesystem(path=to_path)
159
+ from_path = strip_file_header(from_path)
160
+ if isinstance(file_system, AsyncFileSystem):
161
+ dst = await file_system._put(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
162
+ else:
163
+ dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
164
+ if isinstance(dst, (str, pathlib.Path)):
165
+ return str(dst)
166
+ else:
167
+ return to_path
168
+
169
+
170
+ async def put_stream(
171
+ data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
172
+ ) -> str:
173
+ """
174
+ Put a stream of data to a remote location. This is useful for streaming data to a remote location.
175
+ Example usage:
176
+ ```python
177
+ import flyte.storage as storage
178
+ storage.put_stream(iter([b'hello']), name="my_file.txt")
179
+ OR
180
+ storage.put_stream(iter([b'hello']), to_path="s3://my_bucket/my_file.txt")
181
+ ```
182
+
183
+ :param data_iterable: Iterable of bytes to be streamed.
184
+ :param name: Name of the file to be created. If not provided, a random name will be generated.
185
+ :param to_path: Path to the remote location where the data will be stored.
186
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
187
+ :rtype: str
188
+ :return: The path to the remote location where the data was stored.
189
+ """
190
+ if not to_path:
191
+ from flyte._context import internal_ctx
192
+
193
+ ctx = internal_ctx()
194
+ to_path = ctx.raw_data.get_random_remote_path(file_name=name)
195
+ fs = get_underlying_filesystem(path=to_path)
196
+ file_handle = None
197
+ if isinstance(fs, AsyncFileSystem):
198
+ try:
199
+ file_handle = await fs.open_async(to_path, "wb", **kwargs)
200
+ if isinstance(data_iterable, bytes):
201
+ await file_handle.write(data_iterable)
202
+ else:
203
+ async for data in data_iterable:
204
+ await file_handle.write(data)
205
+ return str(to_path)
206
+ except NotImplementedError:
207
+ logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
208
+ finally:
209
+ if file_handle is not None:
210
+ await file_handle.close()
211
+
212
+ with fs.open(to_path, "wb", **kwargs) as f:
213
+ if isinstance(data_iterable, bytes):
214
+ f.write(data_iterable)
215
+ else:
216
+ # If data_iterable is async iterable, iterate over it and write each chunk to the file
217
+ async for data in data_iterable:
218
+ f.write(data)
219
+ return str(to_path)
220
+
221
+
222
+ async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncIterator[bytes]:
223
+ """
224
+ Get a stream of data from a remote location.
225
+ This is useful for downloading streaming data from a remote location.
226
+ Example usage:
227
+ ```python
228
+ import flyte.storage as storage
229
+ obj = storage.get_stream(path="s3://my_bucket/my_file.txt")
230
+ ```
231
+
232
+ :param path: Path to the remote location where the data will be downloaded.
233
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
234
+ :param chunk_size: Size of each chunk to be read from the file.
235
+ :return: An async iterator that yields chunks of data.
236
+ """
237
+ fs = get_underlying_filesystem(path=path, **kwargs)
238
+ file_size = fs.info(path)["size"]
239
+ total_read = 0
240
+ file_handle = None
241
+ try:
242
+ if isinstance(fs, AsyncFileSystem):
243
+ file_handle = await fs.open_async(path, "rb")
244
+ while chunk := await file_handle.read(min(chunk_size, file_size - total_read)):
245
+ total_read += len(chunk)
246
+ yield chunk
247
+ return
248
+ except NotImplementedError:
249
+ logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
250
+ finally:
251
+ if file_handle is not None:
252
+ file_handle.close()
253
+
254
+ # Sync fallback
255
+ with fs.open(path, "rb") as file_handle:
256
+ while chunk := file_handle.read(min(chunk_size, file_size - total_read)):
257
+ total_read += len(chunk)
258
+ yield chunk
259
+
260
+
261
+ def join(*paths: str) -> str:
262
+ """
263
+ Join multiple paths together. This is a wrapper around os.path.join.
264
+ # TODO replace with proper join with fsspec root etc
265
+
266
+ :param paths: Paths to be joined.
267
+ """
268
+ return str(os.path.join(*paths))
269
+
270
+
271
+ register(["s3", "gs", "abfs", "abfss"], asynchronous=True)
@@ -0,0 +1,5 @@
1
+ import os
2
+
3
+ # This is the default chunk size flyte will use for writing to S3 and GCS. This is set to 25MB by default and is
4
+ # configurable by the user if needed. This is used when put() is called on filesystems.
5
+ _WRITE_SIZE_CHUNK_BYTES = int(os.environ.get("_F_P_WRITE_CHUNK_SIZE", "26214400")) # 25 * 2**20