flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -231
  26. flyte/_initialize.py +456 -316
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +319 -36
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +462 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. flyte/{_cli → cli}/_params.py +57 -17
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +2 -167
  121. flyte/config/_config.py +215 -163
  122. flyte/config/_internal.py +10 -1
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +226 -126
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -299
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -174
  195. flyte/_cli/main.py +0 -98
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.2.0b1.dist-info/METADATA +0 -179
  261. flyte-0.2.0b1.dist-info/RECORD +0 -204
  262. flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
  263. /flyte/{_cli → _debug}/__init__.py +0 -0
  264. /flyte/{_protos → _keyring}/__init__.py +0 -0
  265. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  266. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/storage/_storage.py CHANGED
@@ -1,19 +1,38 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
  import pathlib
3
5
  import random
4
6
  import tempfile
5
7
  import typing
6
- from typing import AsyncIterator, Optional
8
+ from typing import AsyncGenerator, Optional
7
9
  from uuid import UUID
8
10
 
9
11
  import fsspec
12
+ import obstore
10
13
  from fsspec.asyn import AsyncFileSystem
11
14
  from fsspec.utils import get_protocol
12
15
  from obstore.exceptions import GenericError
13
16
  from obstore.fsspec import register
17
+ from obstore.store import ObjectStore
14
18
 
15
19
  from flyte._initialize import get_storage
16
20
  from flyte._logging import logger
21
+ from flyte.errors import InitializationError, OnlyAsyncIOSupportedError
22
+
23
+ if typing.TYPE_CHECKING:
24
+ from obstore import AsyncReadableFile, AsyncWritableFile
25
+
26
+ _OBSTORE_SUPPORTED_PROTOCOLS = ["s3", "gs", "abfs", "abfss"]
27
+
28
+
29
+ def _is_obstore_supported_protocol(protocol: str) -> bool:
30
+ """
31
+ Check if the given protocol is supported by obstore.
32
+ :param protocol: Protocol to check.
33
+ :return: True if the protocol is supported, False otherwise.
34
+ """
35
+ return protocol in _OBSTORE_SUPPORTED_PROTOCOLS
17
36
 
18
37
 
19
38
  def is_remote(path: typing.Union[pathlib.Path | str]) -> bool:
@@ -62,6 +81,52 @@ def get_random_local_directory() -> pathlib.Path:
62
81
  return _dir
63
82
 
64
83
 
84
+ def get_configured_fsspec_kwargs(
85
+ protocol: typing.Optional[str] = None, anonymous: bool = False
86
+ ) -> typing.Dict[str, typing.Any]:
87
+ if protocol:
88
+ # Try to get storage config safely - may not be initialized for local operations
89
+ try:
90
+ storage_config = get_storage()
91
+ except InitializationError:
92
+ storage_config = None
93
+
94
+ match protocol:
95
+ case "s3":
96
+ # If the protocol is s3, we can use the s3 filesystem
97
+ from flyte.storage import S3
98
+
99
+ if storage_config and isinstance(storage_config, S3):
100
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
101
+
102
+ return S3.auto().get_fsspec_kwargs(anonymous=anonymous)
103
+ case "gs":
104
+ # If the protocol is gs, we can use the gs filesystem
105
+ from flyte.storage import GCS
106
+
107
+ if storage_config and isinstance(storage_config, GCS):
108
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
109
+
110
+ return GCS.auto().get_fsspec_kwargs(anonymous=anonymous)
111
+ case "abfs" | "abfss":
112
+ # If the protocol is abfs or abfss, we can use the abfs filesystem
113
+ from flyte.storage import ABFS
114
+
115
+ if storage_config and isinstance(storage_config, ABFS):
116
+ return storage_config.get_fsspec_kwargs(anonymous=anonymous)
117
+
118
+ return ABFS.auto().get_fsspec_kwargs(anonymous=anonymous)
119
+ case _:
120
+ return {}
121
+
122
+ # If no protocol, return args from storage config if set
123
+ storage_config = get_storage()
124
+ if storage_config:
125
+ return storage_config.get_fsspec_kwargs(anonymous)
126
+
127
+ return {}
128
+
129
+
65
130
  def get_underlying_filesystem(
66
131
  protocol: typing.Optional[str] = None,
67
132
  anonymous: bool = False,
@@ -72,10 +137,10 @@ def get_underlying_filesystem(
72
137
  # If protocol is None, get it from the path
73
138
  protocol = get_protocol(path)
74
139
 
75
- storage_config = get_storage()
76
- if storage_config:
77
- kwargs.update(storage_config.get_fsspec_kwargs(anonymous, **kwargs))
78
- return fsspec.filesystem(protocol, **kwargs)
140
+ configured_kwargs = get_configured_fsspec_kwargs(protocol, anonymous=anonymous)
141
+ configured_kwargs.update(kwargs)
142
+
143
+ return fsspec.filesystem(protocol, **configured_kwargs)
79
144
 
80
145
 
81
146
  def _get_anonymous_filesystem(from_path):
@@ -83,16 +148,86 @@ def _get_anonymous_filesystem(from_path):
83
148
  return get_underlying_filesystem(get_protocol(from_path), anonymous=True, asynchronous=True)
84
149
 
85
150
 
151
+ async def _get_obstore_bypass(
152
+ from_path: str,
153
+ to_path: str | pathlib.Path,
154
+ recursive: bool = False,
155
+ exclude: list[str] | None = None,
156
+ **kwargs,
157
+ ) -> str:
158
+ from flyte.storage._parallel_reader import ObstoreParallelReader
159
+
160
+ fs = get_underlying_filesystem(path=from_path)
161
+ bucket, prefix = fs._split_path(from_path) # pylint: disable=W0212
162
+ store: ObjectStore = fs._construct_store(bucket)
163
+
164
+ download_kwargs = {}
165
+ if "chunk_size" in kwargs:
166
+ download_kwargs["chunk_size"] = kwargs["chunk_size"]
167
+ if "max_concurrency" in kwargs:
168
+ download_kwargs["max_concurrency"] = kwargs["max_concurrency"]
169
+
170
+ reader = ObstoreParallelReader(store, **download_kwargs)
171
+ target_path = pathlib.Path(to_path) if isinstance(to_path, str) else to_path
172
+
173
+ # if recursive, just download the prefix to the target path
174
+ if recursive:
175
+ logger.debug(f"Downloading recursively {prefix=} to {target_path=}")
176
+ await reader.download_files(
177
+ prefix,
178
+ target_path,
179
+ exclude=exclude,
180
+ )
181
+ return str(to_path)
182
+
183
+ # if not recursive, we need to split out the file name from the prefix
184
+ else:
185
+ path_for_reader = pathlib.Path(prefix).name
186
+ final_prefix = pathlib.Path(prefix).parent
187
+ logger.debug(f"Downloading single file {final_prefix=}, {path_for_reader=} to {target_path=}")
188
+ await reader.download_files(
189
+ final_prefix,
190
+ target_path.parent,
191
+ path_for_reader,
192
+ destination_file_name=target_path.name,
193
+ )
194
+ return str(target_path)
195
+
196
+
86
197
  async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recursive: bool = False, **kwargs) -> str:
87
198
  if not to_path:
88
- name = pathlib.Path(from_path).name
199
+ name = pathlib.Path(from_path).name # may need to be adjusted for windows
89
200
  to_path = get_random_local_path(file_path_or_file_name=name)
90
201
  logger.debug(f"Storing file from {from_path} to {to_path}")
202
+ else:
203
+ # Only apply directory logic for single files (not recursive)
204
+ if not recursive:
205
+ to_path_str = str(to_path)
206
+ # Check for trailing separator BEFORE converting to Path (which normalizes and removes it)
207
+ ends_with_sep = to_path_str.endswith(os.sep)
208
+ to_path_obj = pathlib.Path(to_path)
209
+
210
+ # If path ends with os.sep or is an existing directory, append source filename
211
+ if ends_with_sep or (to_path_obj.exists() and to_path_obj.is_dir()):
212
+ source_filename = pathlib.Path(from_path).name # may need to be adjusted for windows
213
+ to_path = to_path_obj / source_filename
214
+ # For recursive=True, keep to_path as-is (it's the destination directory for contents)
215
+
91
216
  file_system = get_underlying_filesystem(path=from_path)
217
+
218
+ # Check if we should use obstore bypass
219
+ if (
220
+ _is_obstore_supported_protocol(file_system.protocol)
221
+ and hasattr(file_system, "_split_path")
222
+ and hasattr(file_system, "_construct_store")
223
+ and recursive
224
+ ):
225
+ return await _get_obstore_bypass(from_path, to_path, recursive, **kwargs)
226
+
92
227
  try:
93
228
  return await _get_from_filesystem(file_system, from_path, to_path, recursive=recursive, **kwargs)
94
229
  except (OSError, GenericError) as oe:
95
- logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}")
230
+ logger.debug(f"Error in getting {from_path} to {to_path}, recursive: {recursive}, error: {oe}")
96
231
  if isinstance(file_system, AsyncFileSystem):
97
232
  try:
98
233
  exists = await file_system._exists(from_path) # pylint: disable=W0212
@@ -103,7 +238,6 @@ async def get(from_path: str, to_path: Optional[str | pathlib.Path] = None, recu
103
238
  else:
104
239
  exists = file_system.exists(from_path)
105
240
  if not exists:
106
- # TODO: update exception to be more specific
107
241
  raise AssertionError(f"Unable to load data from {from_path}")
108
242
  file_system = _get_anonymous_filesystem(from_path)
109
243
  logger.debug(f"Attempting anonymous get with {file_system}")
@@ -118,21 +252,21 @@ async def _get_from_filesystem(
118
252
  **kwargs,
119
253
  ):
120
254
  if isinstance(file_system, AsyncFileSystem):
121
- dst = await file_system._get(from_path, to_path, recursive=recursive, **kwargs) # pylint: disable=W0212
255
+ dst = await file_system._get(str(from_path), str(to_path), recursive=recursive, **kwargs) # pylint: disable=W0212
122
256
  else:
123
- dst = file_system.get(from_path, to_path, recursive=recursive, **kwargs)
257
+ dst = file_system.get(str(from_path), str(to_path), recursive=recursive, **kwargs)
124
258
 
125
259
  if isinstance(dst, (str, pathlib.Path)):
126
260
  return dst
127
- return to_path
261
+ return str(to_path)
128
262
 
129
263
 
130
- async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs):
264
+ async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = False, **kwargs) -> str:
131
265
  if not to_path:
132
266
  from flyte._context import internal_ctx
133
267
 
134
268
  ctx = internal_ctx()
135
- name = pathlib.Path(from_path).name if not recursive else None # don't pass a name for folders
269
+ name = pathlib.Path(from_path).name
136
270
  to_path = ctx.raw_data.get_random_remote_path(file_name=name)
137
271
 
138
272
  file_system = get_underlying_filesystem(path=to_path)
@@ -142,11 +276,53 @@ async def put(from_path: str, to_path: Optional[str] = None, recursive: bool = F
142
276
  else:
143
277
  dst = file_system.put(from_path, to_path, recursive=recursive, **kwargs)
144
278
  if isinstance(dst, (str, pathlib.Path)):
145
- return dst
279
+ return str(dst)
146
280
  else:
147
281
  return to_path
148
282
 
149
283
 
284
+ async def _open_obstore_bypass(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
285
+ """
286
+ Simple obstore bypass for opening files. No fallbacks, obstore only.
287
+ """
288
+
289
+ fs = get_underlying_filesystem(path=path)
290
+ bucket, file_path = fs._split_path(path) # pylint: disable=W0212
291
+ store: ObjectStore = fs._construct_store(bucket)
292
+
293
+ file_handle: AsyncReadableFile | AsyncWritableFile
294
+
295
+ if "w" in mode:
296
+ attributes = kwargs.pop("attributes", {})
297
+ file_handle = obstore.open_writer_async(store, file_path, attributes=attributes)
298
+ else: # read mode
299
+ buffer_size = kwargs.pop("buffer_size", 10 * 2**20)
300
+ file_handle = await obstore.open_reader_async(store, file_path, buffer_size=buffer_size)
301
+ return file_handle
302
+
303
+
304
+ async def open(path: str, mode: str = "rb", **kwargs) -> AsyncReadableFile | AsyncWritableFile:
305
+ """
306
+ Asynchronously open a file and return an async context manager.
307
+ This function checks if the underlying filesystem supports obstore bypass.
308
+ If it does, it uses obstore to open the file. Otherwise, it falls back to
309
+ the standard _open function which uses AsyncFileSystem.
310
+
311
+ It will raise NotImplementedError if neither obstore nor AsyncFileSystem is supported.
312
+ """
313
+ fs = get_underlying_filesystem(path=path)
314
+
315
+ # Check if we should use obstore bypass
316
+ if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
317
+ return await _open_obstore_bypass(path, mode, **kwargs)
318
+
319
+ # Fallback to normal open
320
+ if isinstance(fs, AsyncFileSystem):
321
+ return await fs.open_async(path, mode, **kwargs)
322
+
323
+ raise OnlyAsyncIOSupportedError(f"Filesystem {fs} does not support async operations")
324
+
325
+
150
326
  async def put_stream(
151
327
  data_iterable: typing.AsyncIterable[bytes] | bytes, *, name: str | None = None, to_path: str | None = None, **kwargs
152
328
  ) -> str:
@@ -172,70 +348,75 @@ async def put_stream(
172
348
 
173
349
  ctx = internal_ctx()
174
350
  to_path = ctx.raw_data.get_random_remote_path(file_name=name)
351
+
352
+ # Check if we should use obstore bypass
175
353
  fs = get_underlying_filesystem(path=to_path)
176
- file_handle = None
177
- if isinstance(fs, AsyncFileSystem):
178
- try:
179
- file_handle = await fs.open_async(to_path, "wb", **kwargs)
180
- if isinstance(data_iterable, bytes):
181
- await file_handle.write(data_iterable)
182
- else:
183
- async for data in data_iterable:
184
- await file_handle.write(data)
185
- return str(to_path)
186
- except NotImplementedError:
187
- logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
188
- finally:
189
- if file_handle is not None:
190
- await file_handle.close()
191
-
192
- with fs.open(to_path, "wb", **kwargs) as f:
354
+ try:
355
+ file_handle = typing.cast("AsyncWritableFile", await open(to_path, "wb", **kwargs))
193
356
  if isinstance(data_iterable, bytes):
194
- f.write(data_iterable)
357
+ await file_handle.write(data_iterable)
195
358
  else:
196
- # If data_iterable is async iterable, iterate over it and write each chunk to the file
197
359
  async for data in data_iterable:
198
- f.write(data)
360
+ await file_handle.write(data)
361
+ await file_handle.close()
362
+ return str(to_path)
363
+ except OnlyAsyncIOSupportedError:
364
+ pass
365
+
366
+ # Fallback to normal open
367
+ file_handle_io: typing.IO = fs.open(to_path, mode="wb", **kwargs)
368
+ if isinstance(data_iterable, bytes):
369
+ file_handle_io.write(data_iterable)
370
+ else:
371
+ async for data in data_iterable:
372
+ file_handle_io.write(data)
373
+ file_handle_io.close()
374
+
199
375
  return str(to_path)
200
376
 
201
377
 
202
- async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncIterator[bytes]:
378
+ async def get_stream(path: str, chunk_size=10 * 2**20, **kwargs) -> AsyncGenerator[bytes, None]:
203
379
  """
204
380
  Get a stream of data from a remote location.
205
381
  This is useful for downloading streaming data from a remote location.
206
382
  Example usage:
207
383
  ```python
208
384
  import flyte.storage as storage
209
- obj = storage.get_stream(path="s3://my_bucket/my_file.txt")
385
+ async for chunk in storage.get_stream(path="s3://my_bucket/my_file.txt"):
386
+ process(chunk)
210
387
  ```
211
388
 
212
389
  :param path: Path to the remote location where the data will be downloaded.
213
390
  :param kwargs: Additional arguments to be passed to the underlying filesystem.
214
391
  :param chunk_size: Size of each chunk to be read from the file.
215
- :return: An async iterator that yields chunks of data.
392
+ :return: An async iterator that yields chunks of bytes.
216
393
  """
217
- fs = get_underlying_filesystem(path=path, **kwargs)
218
- file_size = fs.info(path)["size"]
219
- total_read = 0
220
- file_handle = None
221
- try:
222
- if isinstance(fs, AsyncFileSystem):
223
- file_handle = await fs.open_async(path, "rb")
224
- while chunk := await file_handle.read(min(chunk_size, file_size - total_read)):
225
- total_read += len(chunk)
226
- yield chunk
227
- return
228
- except NotImplementedError:
229
- logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
230
- finally:
231
- if file_handle is not None:
232
- file_handle.close()
233
-
234
- # Sync fallback
235
- with fs.open(path, "rb") as file_handle:
236
- while chunk := file_handle.read(min(chunk_size, file_size - total_read)):
237
- total_read += len(chunk)
394
+ # Check if we should use obstore bypass
395
+ fs = get_underlying_filesystem(path=path)
396
+ if _is_obstore_supported_protocol(fs.protocol) and hasattr(fs, "_split_path") and hasattr(fs, "_construct_store"):
397
+ # Set buffer_size for obstore if chunk_size is provided
398
+ if "buffer_size" not in kwargs:
399
+ kwargs["buffer_size"] = chunk_size
400
+ file_handle = typing.cast("AsyncReadableFile", await _open_obstore_bypass(path, "rb", **kwargs))
401
+ while chunk := await file_handle.read():
402
+ yield bytes(chunk)
403
+ return
404
+
405
+ # Fallback to normal open
406
+ if "block_size" not in kwargs:
407
+ kwargs["block_size"] = chunk_size
408
+
409
+ if isinstance(fs, AsyncFileSystem):
410
+ file_handle = await fs.open_async(path, "rb", **kwargs)
411
+ while chunk := await file_handle.read():
238
412
  yield chunk
413
+ await file_handle.close()
414
+ return
415
+
416
+ file_handle = fs.open(path, "rb", **kwargs)
417
+ while chunk := file_handle.read():
418
+ yield chunk
419
+ file_handle.close()
239
420
 
240
421
 
241
422
  def join(*paths: str) -> str:
@@ -248,4 +429,32 @@ def join(*paths: str) -> str:
248
429
  return str(os.path.join(*paths))
249
430
 
250
431
 
251
- register(["s3", "gs", "abfs", "abfss"], asynchronous=True)
432
+ async def exists(path: str, **kwargs) -> bool:
433
+ """
434
+ Check if a path exists.
435
+
436
+ :param path: Path to be checked.
437
+ :param kwargs: Additional arguments to be passed to the underlying filesystem.
438
+ :return: True if the path exists, False otherwise.
439
+ """
440
+ try:
441
+ fs = get_underlying_filesystem(path=path, **kwargs)
442
+ if isinstance(fs, AsyncFileSystem):
443
+ _ = await fs._info(path)
444
+ return True
445
+ _ = fs.info(path)
446
+ return True
447
+ except FileNotFoundError:
448
+ return False
449
+
450
+
451
+ def exists_sync(path: str, **kwargs) -> bool:
452
+ try:
453
+ fs = get_underlying_filesystem(path=path, **kwargs)
454
+ _ = fs.info(path)
455
+ return True
456
+ except FileNotFoundError:
457
+ return False
458
+
459
+
460
+ register(_OBSTORE_SUPPORTED_PROTOCOLS, asynchronous=True)
@@ -0,0 +1,56 @@
1
+ """
2
+ # Syncify Module
3
+ This module provides the `syncify` decorator and the `Syncify` class.
4
+ The decorator can be used to convert asynchronous functions or methods into synchronous ones.
5
+ This is useful for integrating async code into synchronous contexts.
6
+
7
+ Every asynchronous function or method wrapped with `syncify` can be called synchronously using the
8
+ parenthesis `()` operator, or asynchronously using the `.aio()` method.
9
+
10
+ Example::
11
+
12
+ ```python
13
+ from flyte.syncify import syncify
14
+
15
+ @syncify
16
+ async def async_function(x: str) -> str:
17
+ return f"Hello, Async World {x}!"
18
+
19
+
20
+ # now you can call it synchronously
21
+ result = async_function("Async World") # Note: no .aio() needed for sync calls
22
+ print(result)
23
+ # Output: Hello, Async World Async World!
24
+
25
+ # or call it asynchronously
26
+ async def main():
27
+ result = await async_function.aio("World") # Note the use of .aio() for async calls
28
+ print(result)
29
+ ```
30
+
31
+ ## Creating a Syncify Instance
32
+ ```python
33
+ from flyte.syncify. import Syncify
34
+
35
+ syncer = Syncify("my_syncer")
36
+
37
+ # Now you can use `syncer` to decorate your async functions or methods
38
+
39
+ ```
40
+
41
+ ## How does it work?
42
+ The Syncify class wraps asynchronous functions, classmethods, instance methods, and static methods to
43
+ provide a synchronous interface. The wrapped methods are always executed in the context of a background loop,
44
+ whether they are called synchronously or asynchronously. This allows for seamless integration of async code, as
45
+ certain async libraries capture the event loop. An example is grpc.aio, which captures the event loop.
46
+ In such a case, the Syncify class ensures that the async function is executed in the context of the background loop.
47
+
48
+ To use it correctly with grpc.aio, you should wrap every grpc.aio channel creation, and client invocation
49
+ with the same `Syncify` instance. This ensures that the async code runs in the correct event loop context.
50
+ """
51
+
52
+ from flyte.syncify._api import Syncify
53
+
54
+ syncify = Syncify()
55
+
56
+ __all__ = ["Syncify", "syncify"]