flyte 2.0.0b32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (204) hide show
  1. flyte/__init__.py +108 -0
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +195 -0
  5. flyte/_bin/serve.py +178 -0
  6. flyte/_build.py +26 -0
  7. flyte/_cache/__init__.py +12 -0
  8. flyte/_cache/cache.py +147 -0
  9. flyte/_cache/defaults.py +9 -0
  10. flyte/_cache/local_cache.py +216 -0
  11. flyte/_cache/policy_function_body.py +42 -0
  12. flyte/_code_bundle/__init__.py +8 -0
  13. flyte/_code_bundle/_ignore.py +121 -0
  14. flyte/_code_bundle/_packaging.py +218 -0
  15. flyte/_code_bundle/_utils.py +347 -0
  16. flyte/_code_bundle/bundle.py +266 -0
  17. flyte/_constants.py +1 -0
  18. flyte/_context.py +155 -0
  19. flyte/_custom_context.py +73 -0
  20. flyte/_debug/__init__.py +0 -0
  21. flyte/_debug/constants.py +38 -0
  22. flyte/_debug/utils.py +17 -0
  23. flyte/_debug/vscode.py +307 -0
  24. flyte/_deploy.py +408 -0
  25. flyte/_deployer.py +109 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +122 -0
  29. flyte/_excepthook.py +37 -0
  30. flyte/_group.py +32 -0
  31. flyte/_hash.py +8 -0
  32. flyte/_image.py +1055 -0
  33. flyte/_initialize.py +628 -0
  34. flyte/_interface.py +119 -0
  35. flyte/_internal/__init__.py +3 -0
  36. flyte/_internal/controllers/__init__.py +129 -0
  37. flyte/_internal/controllers/_local_controller.py +239 -0
  38. flyte/_internal/controllers/_trace.py +48 -0
  39. flyte/_internal/controllers/remote/__init__.py +58 -0
  40. flyte/_internal/controllers/remote/_action.py +211 -0
  41. flyte/_internal/controllers/remote/_client.py +47 -0
  42. flyte/_internal/controllers/remote/_controller.py +583 -0
  43. flyte/_internal/controllers/remote/_core.py +465 -0
  44. flyte/_internal/controllers/remote/_informer.py +381 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +3 -0
  47. flyte/_internal/imagebuild/docker_builder.py +706 -0
  48. flyte/_internal/imagebuild/image_builder.py +277 -0
  49. flyte/_internal/imagebuild/remote_builder.py +386 -0
  50. flyte/_internal/imagebuild/utils.py +78 -0
  51. flyte/_internal/resolvers/__init__.py +0 -0
  52. flyte/_internal/resolvers/_task_module.py +21 -0
  53. flyte/_internal/resolvers/common.py +31 -0
  54. flyte/_internal/resolvers/default.py +28 -0
  55. flyte/_internal/runtime/__init__.py +0 -0
  56. flyte/_internal/runtime/convert.py +486 -0
  57. flyte/_internal/runtime/entrypoints.py +204 -0
  58. flyte/_internal/runtime/io.py +188 -0
  59. flyte/_internal/runtime/resources_serde.py +152 -0
  60. flyte/_internal/runtime/reuse.py +125 -0
  61. flyte/_internal/runtime/rusty.py +193 -0
  62. flyte/_internal/runtime/task_serde.py +362 -0
  63. flyte/_internal/runtime/taskrunner.py +209 -0
  64. flyte/_internal/runtime/trigger_serde.py +160 -0
  65. flyte/_internal/runtime/types_serde.py +54 -0
  66. flyte/_keyring/__init__.py +0 -0
  67. flyte/_keyring/file.py +115 -0
  68. flyte/_logging.py +300 -0
  69. flyte/_map.py +312 -0
  70. flyte/_module.py +72 -0
  71. flyte/_pod.py +30 -0
  72. flyte/_resources.py +473 -0
  73. flyte/_retry.py +32 -0
  74. flyte/_reusable_environment.py +102 -0
  75. flyte/_run.py +724 -0
  76. flyte/_secret.py +96 -0
  77. flyte/_task.py +550 -0
  78. flyte/_task_environment.py +316 -0
  79. flyte/_task_plugins.py +47 -0
  80. flyte/_timeout.py +47 -0
  81. flyte/_tools.py +27 -0
  82. flyte/_trace.py +119 -0
  83. flyte/_trigger.py +1000 -0
  84. flyte/_utils/__init__.py +30 -0
  85. flyte/_utils/asyn.py +121 -0
  86. flyte/_utils/async_cache.py +139 -0
  87. flyte/_utils/coro_management.py +27 -0
  88. flyte/_utils/docker_credentials.py +173 -0
  89. flyte/_utils/file_handling.py +72 -0
  90. flyte/_utils/helpers.py +134 -0
  91. flyte/_utils/lazy_module.py +54 -0
  92. flyte/_utils/module_loader.py +104 -0
  93. flyte/_utils/org_discovery.py +57 -0
  94. flyte/_utils/uv_script_parser.py +49 -0
  95. flyte/_version.py +34 -0
  96. flyte/app/__init__.py +22 -0
  97. flyte/app/_app_environment.py +157 -0
  98. flyte/app/_deploy.py +125 -0
  99. flyte/app/_input.py +160 -0
  100. flyte/app/_runtime/__init__.py +3 -0
  101. flyte/app/_runtime/app_serde.py +347 -0
  102. flyte/app/_types.py +101 -0
  103. flyte/app/extras/__init__.py +3 -0
  104. flyte/app/extras/_fastapi.py +151 -0
  105. flyte/cli/__init__.py +12 -0
  106. flyte/cli/_abort.py +28 -0
  107. flyte/cli/_build.py +114 -0
  108. flyte/cli/_common.py +468 -0
  109. flyte/cli/_create.py +371 -0
  110. flyte/cli/_delete.py +45 -0
  111. flyte/cli/_deploy.py +293 -0
  112. flyte/cli/_gen.py +176 -0
  113. flyte/cli/_get.py +370 -0
  114. flyte/cli/_option.py +33 -0
  115. flyte/cli/_params.py +554 -0
  116. flyte/cli/_plugins.py +209 -0
  117. flyte/cli/_run.py +597 -0
  118. flyte/cli/_serve.py +64 -0
  119. flyte/cli/_update.py +37 -0
  120. flyte/cli/_user.py +17 -0
  121. flyte/cli/main.py +221 -0
  122. flyte/config/__init__.py +3 -0
  123. flyte/config/_config.py +248 -0
  124. flyte/config/_internal.py +73 -0
  125. flyte/config/_reader.py +225 -0
  126. flyte/connectors/__init__.py +11 -0
  127. flyte/connectors/_connector.py +270 -0
  128. flyte/connectors/_server.py +197 -0
  129. flyte/connectors/utils.py +135 -0
  130. flyte/errors.py +243 -0
  131. flyte/extend.py +19 -0
  132. flyte/extras/__init__.py +5 -0
  133. flyte/extras/_container.py +286 -0
  134. flyte/git/__init__.py +3 -0
  135. flyte/git/_config.py +21 -0
  136. flyte/io/__init__.py +29 -0
  137. flyte/io/_dataframe/__init__.py +131 -0
  138. flyte/io/_dataframe/basic_dfs.py +223 -0
  139. flyte/io/_dataframe/dataframe.py +1026 -0
  140. flyte/io/_dir.py +910 -0
  141. flyte/io/_file.py +914 -0
  142. flyte/io/_hashing_io.py +342 -0
  143. flyte/models.py +479 -0
  144. flyte/py.typed +0 -0
  145. flyte/remote/__init__.py +35 -0
  146. flyte/remote/_action.py +738 -0
  147. flyte/remote/_app.py +57 -0
  148. flyte/remote/_client/__init__.py +0 -0
  149. flyte/remote/_client/_protocols.py +189 -0
  150. flyte/remote/_client/auth/__init__.py +12 -0
  151. flyte/remote/_client/auth/_auth_utils.py +14 -0
  152. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  153. flyte/remote/_client/auth/_authenticators/base.py +403 -0
  154. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  155. flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
  156. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  157. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  158. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  159. flyte/remote/_client/auth/_channel.py +213 -0
  160. flyte/remote/_client/auth/_client_config.py +85 -0
  161. flyte/remote/_client/auth/_default_html.py +32 -0
  162. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  163. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  164. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  165. flyte/remote/_client/auth/_keyring.py +152 -0
  166. flyte/remote/_client/auth/_token_client.py +260 -0
  167. flyte/remote/_client/auth/errors.py +16 -0
  168. flyte/remote/_client/controlplane.py +128 -0
  169. flyte/remote/_common.py +30 -0
  170. flyte/remote/_console.py +19 -0
  171. flyte/remote/_data.py +161 -0
  172. flyte/remote/_logs.py +185 -0
  173. flyte/remote/_project.py +88 -0
  174. flyte/remote/_run.py +386 -0
  175. flyte/remote/_secret.py +142 -0
  176. flyte/remote/_task.py +527 -0
  177. flyte/remote/_trigger.py +306 -0
  178. flyte/remote/_user.py +33 -0
  179. flyte/report/__init__.py +3 -0
  180. flyte/report/_report.py +182 -0
  181. flyte/report/_template.html +124 -0
  182. flyte/storage/__init__.py +36 -0
  183. flyte/storage/_config.py +237 -0
  184. flyte/storage/_parallel_reader.py +274 -0
  185. flyte/storage/_remote_fs.py +34 -0
  186. flyte/storage/_storage.py +456 -0
  187. flyte/storage/_utils.py +5 -0
  188. flyte/syncify/__init__.py +56 -0
  189. flyte/syncify/_api.py +375 -0
  190. flyte/types/__init__.py +52 -0
  191. flyte/types/_interface.py +40 -0
  192. flyte/types/_pickle.py +145 -0
  193. flyte/types/_renderer.py +162 -0
  194. flyte/types/_string_literals.py +119 -0
  195. flyte/types/_type_engine.py +2254 -0
  196. flyte/types/_utils.py +80 -0
  197. flyte-2.0.0b32.data/scripts/debug.py +38 -0
  198. flyte-2.0.0b32.data/scripts/runtime.py +195 -0
  199. flyte-2.0.0b32.dist-info/METADATA +351 -0
  200. flyte-2.0.0b32.dist-info/RECORD +204 -0
  201. flyte-2.0.0b32.dist-info/WHEEL +5 -0
  202. flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
  203. flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
  204. flyte-2.0.0b32.dist-info/top_level.txt +1 -0
@@ -0,0 +1,456 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pathlib
5
+ import random
6
+ import tempfile
7
+ import typing
8
+ from typing import AsyncGenerator, Optional
9
+ from uuid import UUID
10
+
11
+ import fsspec
12
+ import obstore
13
+ from fsspec.asyn import AsyncFileSystem
14
+ from fsspec.utils import get_protocol
15
+ from obstore.exceptions import GenericError
16
+ from obstore.fsspec import register
17
+
18
+ from flyte._initialize import get_storage
19
+ from flyte._logging import logger
20
+ from flyte.errors import InitializationError, OnlyAsyncIOSupportedError
21
+
22
+ if typing.TYPE_CHECKING:
23
+ from obstore import AsyncReadableFile, AsyncWritableFile
24
+
25
+ _OBSTORE_SUPPORTED_PROTOCOLS = ["s3", "gs", "abfs", "abfss"]
26
+
27
+
28
+ def _is_obstore_supported_protocol(protocol: str) -> bool:
29
+ """
30
+ Check if the given protocol is supported by obstore.
31
+ :param protocol: Protocol to check.
32
+ :return: True if the protocol is supported, False otherwise.
33
+ """
34
+ return protocol in _OBSTORE_SUPPORTED_PROTOCOLS
35
+
36
+
37
+ def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
38
+ """
39
+ Let's find a replacement
40
+ """
41
+ protocol = get_protocol(str(path))
42
+ if protocol is None:
43
+ return False
44
+ return protocol != "file"
45
+
46
+
47
+ def strip_file_header(path: str) -> str:
48
+ """
49
+ Drops file:// if it exists from the file
50
+ """
51
+ if path.startswith("file://"):
52
+ return path.replace("file://", "", 1)
53
+ return path
54
+
55
+
56
+ def get_random_local_path(file_path_or_file_name: pathlib.Path | str | None = None) -> pathlib.Path:
57
+ """
58
+ Use file_path_or_file_name, when you want a random directory, but want to preserve the leaf file name
59
+ """
60
+ local_tmp = pathlib.Path(tempfile.mkdtemp(prefix="flyte-tmp-"))
61
+ key = UUID(int=random.getrandbits(128)).hex
62
+ tmp_folder = local_tmp / key
63
+ tail = ""
64
+ if file_path_or_file_name:
65
+ _, tail = os.path.split(file_path_or_file_name)
66
+ if tail:
67
+ tmp_folder.mkdir(parents=True, exist_ok=True)
68
+ return tmp_folder / tail
69
+ local_tmp.mkdir(parents=True, exist_ok=True)
70
+ return tmp_folder
71
+
72
+
73
+ def get_random_local_directory() -> pathlib.Path:
74
+ """
75
+ :return: a random directory
76
+ :rtype: pathlib.Path
77
+ """
78
+ _dir = get_random_local_path(None)
79
+ pathlib.Path(_dir).mkdir(parents=True, exist_ok=True)
80
+ return _dir
81
+
82
+
83
+ def get_configured_fsspec_kwargs(
84
+ protocol: typing.Optional[str] = None, anonymous: bool = False
85
+ ) -> typing.Dict[str, typing.Any]:
86
+ if protocol:
87
+ # Try to get storage config safely - may not be initialized for local operations
88
+ try:
89
+ storage_config = get_storage()
90
+ except InitializationError:
91
+ storage_config = None
92
+
93
+ match protocol:
94
+ case "s3":
95
+ # If the protocol is s3, we can use the s3 filesystem
96
+ from flyte.storage import S3
97
+
98
+ if storage_config and isinstance(storage_config, S3):
99
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
100
+
101
+ return S3.auto().get_fsspec_kwargs(anonymous=anonymous)
102
+ case "gs":
103
+ # If the protocol is gs, we can use the gs filesystem
104
+ from flyte.storage import GCS
105
+
106
+ if storage_config and isinstance(storage_config, GCS):
107
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
108
+
109
+ return GCS.auto().get_fsspec_kwargs(anonymous=anonymous)
110
+ case "abfs" | "abfss":
111
+ # If the protocol is abfs or abfss, we can use the abfs filesystem
112
+ from flyte.storage import ABFS
113
+
114
+ if storage_config and isinstance(storage_config, ABFS):
115
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
116
+
117
+ return ABFS.auto().get_fsspec_kwargs(anonymous=anonymous)
118
+ case _:
119
+ return {}
120
+
121
+ # If no protocol, return args from storage config if set
122
+ storage_config = get_storage()
123
+ if storage_config:
124
+ return storage_config.get_fsspec_kwargs(anonymous)
125
+
126
+ return {}
127
+
128
+
129
+ def get_underlying_filesystem(
130
+ protocol: typing.Optional[str] = None,
131
+ anonymous: bool = False,
132
+ path: typing.Optional[str] = None,
133
+ **kwargs,
134
+ ) -> fsspec.AbstractFileSystem:
135
+ if protocol is None:
136
+ # If protocol is None, get it from the path
137
+ protocol = get_protocol(path)
138
+
139
+ configured_kwargs = get_configured_fsspec_kwargs(protocol, anonymous=anonymous)
140
+ configured_kwargs.update(kwargs)
141
+
142
+ return fsspec.filesystem(protocol, **configured_kwargs)
143
+
144
+
145
+ def _get_anonymous_filesystem(from_path):
146
+ """Get the anonymous file system if needed."""
147
+ return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
148
+
149
+
150
+ async def _get_obstore_bypass(from_path: str, to_path: str | pathlib.Path, recursive: bool = False, **kwargs) -> str:
151
+ from obstore.store import ObjectStore
152
+
153
+ from flyte.storage._parallel_reader import ObstoreParallelReader
154
+
155
+ fs = get_underlying_filesystem(path=from_path)
156
+ bucket, prefix = fs._split_path(from_path) # pylint: disable=W0212
157
+ store: ObjectStore = fs._construct_store(bucket)
158
+
159
+ download_kwargs = {}
160
+ if "chunk_size" in kwargs:
161
+ download_kwargs["chunk_size"] = kwargs["chunk_size"]
162
+ if "max_concurrency" in kwargs:
163
+ download_kwargs["max_concurrency"] = kwargs["max_concurrency"]
164
+
165
+ reader = ObstoreParallelReader(store, **download_kwargs)
166
+ target_path = pathlib.Path(to_path) if isinstance(to_path, str) else to_path
167
+
168
+ # if recursive, just download the prefix to the target path
169
+ if recursive:
170
+ logger.debug(f"Downloading recursively {prefix=} to {target_path=}")
171
+ await reader.download_files(
172
+ prefix,
173
+ target_path,
174
+ )
175
+ return str(to_path)
176
+
177
+ # if not recursive, we need to split out the file name from the prefix
178
+ else:
179
+ path_for_reader = pathlib.Path(prefix).name
180
+ final_prefix = pathlib.Path(prefix).parent
181
+ logger.debug(f"Downloading single file {final_prefix=}, {path_for_reader=} to {target_path=}")
182
+ await reader.download_files(
183
+ final_prefix,
184
+ target_path.parent,
185
+ path_for_reader,
186
+ destination_file_name=target_path.name,
187
+ )
188
+ return str(target_path)
189
+
190
+
191
+ async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
192
+ if not to_path:
193
+ name = pathlib.Path(from_path).name # may need to be adjusted for windows
194
+ to_path = get_random_local_path(file_path_or_file_name=name)
195
+ logger.debug(f"Storing file from {from_path} to {to_path}")
196
+ else:
197
+ # Only apply directory logic for single files (not recursive)
198
+ if not recursive:
199
+ to_path_str = str(to_path)
200
+ # Check for trailing separator BEFORE converting to Path (which normalizes and removes it)
201
+ ends_with_sep = to_path_str.endswith(os.sep)
202
+ to_path_obj = pathlib.Path(to_path)
203
+
204
+ # If path ends with os.sep or is an existing directory, append source filename
205
+ if ends_with_sep or (to_path_obj.exists() and to_path_obj.is_dir()):
206
+ source_filename = pathlib.Path(from_path).name # may need to be adjusted for windows
207
+ to_path = to_path_obj / source_filename
208
+ # For recursive=True, keep to_path as-is (it's the destination directory for contents)
209
+
210
+ file_system = get_underlying_filesystem(path=from_path)
211
+
212
+ # Check if we should use obstore bypass
213
+ if (
214
+ _is_obstore_supported_protocol(file_system.protocol)
215
+ and hasattr(file_system, "_split_path")
216
+ and hasattr(file_system, "_construct_store")
217
+ and recursive
218
+ ):
219
+ return await _get_obstore_bypass(from_path, to_path, recursive, **kwargs)
220
+
221
+ try:
222
+ return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
223
+ except (OSError, GenericError) as oe:
224
+ logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}")
225
+ if isinstance(file_system, AsyncFileSystem):
226
+ try:
227
+ exists = await file_system._exists(from_path) # pylint: disable=W0212
228
+ except GenericError:
229
+ # for obstore, as it does not raise FileNotFoundError in fsspec but GenericError
230
+ # force it to try get_filesystem(anonymous=True)
231
+ exists = True
232
+ else:
233
+ exists = file_system.exists(from_path)
234
+ if not exists:
235
+ raise AssertionError(f"Unable to load data from {from_path}")
236
+ file_system = _get_anonymous_filesystem(from_path)
237
+ logger.debug(f"Attempting anonymous get with {file_system}")
238
+ return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
239
+
240
+
241
+ async def _get_from_filesystem(
242
+ file_system: fsspec.AbstractFileSystem,
243
+ from_path: str | pathlib.Path,
244
+ to_path: str | pathlib.Path,
245
+ recursive: bool,
246
+ **kwargs,
247
+ ):
248
+ if isinstance(file_system, AsyncFileSystem):
249
+ dst = await file_system._get(str(from_path), str(to_path), recursive=recursive, **kwargs) # pylint: disable=W0212
250
+ else:
251
+ dst = file_system.get(str(from_path), str(to_path), recursive=recursive, **kwargs)
252
+
253
+ if isinstance(dst, (str, pathlib.Path)):
254
+ return dst
255
+ return str(to_path)
256
+
257
+
258
+ async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
259
+ if not to_path:
260
+ from flyte._context import internal_ctx
261
+
262
+ ctx = internal_ctx()
263
+ name = pathlib.Path(from_path).name
264
+ to_path = ctx.raw_data.get_random_remote_path(file_name=name)
265
+
266
+ file_system = get_underlying_filesystem(path=to_path)
267
+ from_path = strip_file_header(from_path)
268
+ if isinstance(file_system, AsyncFileSystem):
269
+ dst = await file_system._put(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
270
+ else:
271
+ dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
272
+ if isinstance(dst, (str, pathlib.Path)):
273
+ return str(dst)
274
+ else:
275
+ return to_path
276
+
277
+
278
+ async def _open_obstore_bypass(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
279
+ """
280
+ Simple obstore bypass for opening files. No fallbacks, obstore only.
281
+ """
282
+ from obstore.store import ObjectStore
283
+
284
+ fs = get_underlying_filesystem(path=path)
285
+ bucket, file_path = fs._split_path(path) # pylint: disable=W0212
286
+ store: ObjectStore = fs._construct_store(bucket)
287
+
288
+ file_handle: AsyncReadableFile | AsyncWritableFile
289
+
290
+ if "w" in mode:
291
+ attributes = kwargs.pop("attributes", {})
292
+ file_handle = obstore.open_writer_async(store, file_path, attributes=attributes)
293
+ else: # read mode
294
+ buffer_size = kwargs.pop("buffer_size", 10 * 2**20)
295
+ file_handle = await obstore.open_reader_async(store, file_path, buffer_size=buffer_size)
296
+
297
+ return file_handle
298
+
299
+
300
+ async def open(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
301
+ """
302
+ Asynchronously open a file and return an async context manager.
303
+ This function checks if the underlying filesystem supports obstore bypass.
304
+ If it does, it uses obstore to open the file. Otherwise, it falls back to
305
+ the standard _open function which uses AsyncFileSystem.
306
+
307
+ It will raise NotImplementedError if neither obstore nor AsyncFileSystem is supported.
308
+ """
309
+ fs = get_underlying_filesystem(path=path)
310
+
311
+ # Check if we should use obstore bypass
312
+ if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
313
+ return await _open_obstore_bypass(path, mode, **kwargs)
314
+
315
+ # Fallback to normal open
316
+ if isinstance(fs, AsyncFileSystem):
317
+ return await fs.open_async(path, mode, **kwargs)
318
+
319
+ raise OnlyAsyncIOSupportedError(f"Filesystem {fs} does not support async operations")
320
+
321
+
322
+ async def put_stream(
323
+ data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
324
+ ) -> str:
325
+ """
326
+ Put a stream of data to a remote location. This is useful for streaming data to a remote location.
327
+ Example usage:
328
+ ```python
329
+ import flyte.storage as storage
330
+ storage.put_stream(iter([b'hello']), name="my_file.txt")
331
+ OR
332
+ storage.put_stream(iter([b'hello']), to_path="s3://my_bucket/my_file.txt")
333
+ ```
334
+
335
+ :param data_iterable: Iterable of bytes to be streamed.
336
+ :param name: Name of the file to be created. If not provided, a random name will be generated.
337
+ :param to_path: Path to the remote location where the data will be stored.
338
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
339
+ :rtype: str
340
+ :return: The path to the remote location where the data was stored.
341
+ """
342
+ if not to_path:
343
+ from flyte._context import internal_ctx
344
+
345
+ ctx = internal_ctx()
346
+ to_path = ctx.raw_data.get_random_remote_path(file_name=name)
347
+
348
+ # Check if we should use obstore bypass
349
+ fs = get_underlying_filesystem(path=to_path)
350
+ try:
351
+ file_handle = typing.cast("AsyncWritableFile", await open(to_path, "wb", **kwargs))
352
+ if isinstance(data_iterable, bytes):
353
+ await file_handle.write(data_iterable)
354
+ else:
355
+ async for data in data_iterable:
356
+ await file_handle.write(data)
357
+ await file_handle.close()
358
+ return str(to_path)
359
+ except OnlyAsyncIOSupportedError:
360
+ pass
361
+
362
+ # Fallback to normal open
363
+ file_handle_io: typing.IO = fs.open(to_path, mode="wb", **kwargs)
364
+ if isinstance(data_iterable, bytes):
365
+ file_handle_io.write(data_iterable)
366
+ else:
367
+ async for data in data_iterable:
368
+ file_handle_io.write(data)
369
+ file_handle_io.close()
370
+
371
+ return str(to_path)
372
+
373
+
374
+ async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncGenerator[bytes, None]:
375
+ """
376
+ Get a stream of data from a remote location.
377
+ This is useful for downloading streaming data from a remote location.
378
+ Example usage:
379
+ ```python
380
+ import flyte.storage as storage
381
+ async for chunk in storage.get_stream(path="s3://my_bucket/my_file.txt"):
382
+ process(chunk)
383
+ ```
384
+
385
+ :param path: Path to the remote location where the data will be downloaded.
386
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
387
+ :param chunk_size: Size of each chunk to be read from the file.
388
+ :return: An async iterator that yields chunks of bytes.
389
+ """
390
+ # Check if we should use obstore bypass
391
+ fs = get_underlying_filesystem(path=path)
392
+ if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
393
+ # Set buffer_size for obstore if chunk_size is provided
394
+ if "buffer_size" not in kwargs:
395
+ kwargs["buffer_size"] = chunk_size
396
+ file_handle = typing.cast("AsyncReadableFile", await _open_obstore_bypass(path, "rb", **kwargs))
397
+ while chunk := await file_handle.read():
398
+ yield bytes(chunk)
399
+ return
400
+
401
+ # Fallback to normal open
402
+ if "block_size" not in kwargs:
403
+ kwargs["block_size"] = chunk_size
404
+
405
+ if isinstance(fs, AsyncFileSystem):
406
+ file_handle = await fs.open_async(path, "rb", **kwargs)
407
+ while chunk := await file_handle.read():
408
+ yield chunk
409
+ await file_handle.close()
410
+ return
411
+
412
+ file_handle = fs.open(path, "rb", **kwargs)
413
+ while chunk := file_handle.read():
414
+ yield chunk
415
+ file_handle.close()
416
+
417
+
418
+ def join(*paths: str) -> str:
419
+ """
420
+ Join multiple paths together. This is a wrapper around os.path.join.
421
+ # TODO replace with proper join with fsspec root etc
422
+
423
+ :param paths: Paths to be joined.
424
+ """
425
+ return str(os.path.join(*paths))
426
+
427
+
428
+ async def exists(path: str, **kwargs) -> bool:
429
+ """
430
+ Check if a path exists.
431
+
432
+ :param path: Path to be checked.
433
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
434
+ :return: True if the path exists, False otherwise.
435
+ """
436
+ try:
437
+ fs = get_underlying_filesystem(path=path, **kwargs)
438
+ if isinstance(fs, AsyncFileSystem):
439
+ _ = await fs._info(path)
440
+ return True
441
+ _ = fs.info(path)
442
+ return True
443
+ except FileNotFoundError:
444
+ return False
445
+
446
+
447
+ def exists_sync(path: str, **kwargs) -> bool:
448
+ try:
449
+ fs = get_underlying_filesystem(path=path, **kwargs)
450
+ _ = fs.info(path)
451
+ return True
452
+ except FileNotFoundError:
453
+ return False
454
+
455
+
456
+ register(_OBSTORE_SUPPORTED_PROTOCOLS, asynchronous=True)
@@ -0,0 +1,5 @@
1
+ import os
2
+
3
+ # This is the default chunk size flyte will use for writing to S3 and GCS. This is set to 25MB by default and is
4
+ # configurable by the user if needed. This is used when put() is called on filesystems.
5
+ _WRITE_SIZE_CHUNK_BYTES = int(os.environ.get("_F_P_WRITE_CHUNK_SIZE", "26214400")) # 25 * 2**20
@@ -0,0 +1,56 @@
1
+ """
2
+ # Syncify Module
3
+ This module provides the `syncify` decorator and the `Syncify` class.
4
+ The decorator can be used to convert asynchronous functions or methods into synchronous ones.
5
+ This is useful for integrating async code into synchronous contexts.
6
+
7
+ Every asynchronous function or method wrapped with `syncify` can be called synchronously using the
8
+ parenthesis `()` operator, or asynchronously using the `.aio()` method.
9
+
10
+ Example::
11
+
12
+ ```python
13
+ from flyte.syncify import syncify
14
+
15
+ @syncify
16
+ async def async_function(x: str) -> str:
17
+ return f"Hello, Async World {x}!"
18
+
19
+
20
+ # now you can call it synchronously
21
+ result = async_function("Async World") # Note: no .aio() needed for sync calls
22
+ print(result)
23
+ # Output: Hello, Async World Async World!
24
+
25
+ # or call it asynchronously
26
+ async def main():
27
+ result = await async_function.aio("World") # Note the use of .aio() for async calls
28
+ print(result)
29
+ ```
30
+
31
+ ## Creating a Syncify Instance
32
+ ```python
33
+ from flyte.syncify. import Syncify
34
+
35
+ syncer = Syncify("my_syncer")
36
+
37
+ # Now you can use `syncer` to decorate your async functions or methods
38
+
39
+ ```
40
+
41
+ ## How does it work?
42
+ The Syncify class wraps asynchronous functions, classmethods, instance methods, and static methods to
43
+ provide a synchronous interface. The wrapped methods are always executed in the context of a background loop,
44
+ whether they are called synchronously or asynchronously. This allows for seamless integration of async code, as
45
+ certain async libraries capture the event loop. An example is grpc.aio, which captures the event loop.
46
+ In such a case, the Syncify class ensures that the async function is executed in the context of the background loop.
47
+
48
+ To use it correctly with grpc.aio, you should wrap every grpc.aio channel creation, and client invocation
49
+ with the same `Syncify` instance. This ensures that the async code runs in the correct event loop context.
50
+ """
51
+
52
+ from flyte.syncify._api import Syncify
53
+
54
+ syncify = Syncify()
55
+
56
+ __all__ = ["Syncify", "syncify"]