flyte 2.0.0b32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (204) hide show
  1. flyte/__init__.py +108 -0
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +195 -0
  5. flyte/_bin/serve.py +178 -0
  6. flyte/_build.py +26 -0
  7. flyte/_cache/__init__.py +12 -0
  8. flyte/_cache/cache.py +147 -0
  9. flyte/_cache/defaults.py +9 -0
  10. flyte/_cache/local_cache.py +216 -0
  11. flyte/_cache/policy_function_body.py +42 -0
  12. flyte/_code_bundle/__init__.py +8 -0
  13. flyte/_code_bundle/_ignore.py +121 -0
  14. flyte/_code_bundle/_packaging.py +218 -0
  15. flyte/_code_bundle/_utils.py +347 -0
  16. flyte/_code_bundle/bundle.py +266 -0
  17. flyte/_constants.py +1 -0
  18. flyte/_context.py +155 -0
  19. flyte/_custom_context.py +73 -0
  20. flyte/_debug/__init__.py +0 -0
  21. flyte/_debug/constants.py +38 -0
  22. flyte/_debug/utils.py +17 -0
  23. flyte/_debug/vscode.py +307 -0
  24. flyte/_deploy.py +408 -0
  25. flyte/_deployer.py +109 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +122 -0
  29. flyte/_excepthook.py +37 -0
  30. flyte/_group.py +32 -0
  31. flyte/_hash.py +8 -0
  32. flyte/_image.py +1055 -0
  33. flyte/_initialize.py +628 -0
  34. flyte/_interface.py +119 -0
  35. flyte/_internal/__init__.py +3 -0
  36. flyte/_internal/controllers/__init__.py +129 -0
  37. flyte/_internal/controllers/_local_controller.py +239 -0
  38. flyte/_internal/controllers/_trace.py +48 -0
  39. flyte/_internal/controllers/remote/__init__.py +58 -0
  40. flyte/_internal/controllers/remote/_action.py +211 -0
  41. flyte/_internal/controllers/remote/_client.py +47 -0
  42. flyte/_internal/controllers/remote/_controller.py +583 -0
  43. flyte/_internal/controllers/remote/_core.py +465 -0
  44. flyte/_internal/controllers/remote/_informer.py +381 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +3 -0
  47. flyte/_internal/imagebuild/docker_builder.py +706 -0
  48. flyte/_internal/imagebuild/image_builder.py +277 -0
  49. flyte/_internal/imagebuild/remote_builder.py +386 -0
  50. flyte/_internal/imagebuild/utils.py +78 -0
  51. flyte/_internal/resolvers/__init__.py +0 -0
  52. flyte/_internal/resolvers/_task_module.py +21 -0
  53. flyte/_internal/resolvers/common.py +31 -0
  54. flyte/_internal/resolvers/default.py +28 -0
  55. flyte/_internal/runtime/__init__.py +0 -0
  56. flyte/_internal/runtime/convert.py +486 -0
  57. flyte/_internal/runtime/entrypoints.py +204 -0
  58. flyte/_internal/runtime/io.py +188 -0
  59. flyte/_internal/runtime/resources_serde.py +152 -0
  60. flyte/_internal/runtime/reuse.py +125 -0
  61. flyte/_internal/runtime/rusty.py +193 -0
  62. flyte/_internal/runtime/task_serde.py +362 -0
  63. flyte/_internal/runtime/taskrunner.py +209 -0
  64. flyte/_internal/runtime/trigger_serde.py +160 -0
  65. flyte/_internal/runtime/types_serde.py +54 -0
  66. flyte/_keyring/__init__.py +0 -0
  67. flyte/_keyring/file.py +115 -0
  68. flyte/_logging.py +300 -0
  69. flyte/_map.py +312 -0
  70. flyte/_module.py +72 -0
  71. flyte/_pod.py +30 -0
  72. flyte/_resources.py +473 -0
  73. flyte/_retry.py +32 -0
  74. flyte/_reusable_environment.py +102 -0
  75. flyte/_run.py +724 -0
  76. flyte/_secret.py +96 -0
  77. flyte/_task.py +550 -0
  78. flyte/_task_environment.py +316 -0
  79. flyte/_task_plugins.py +47 -0
  80. flyte/_timeout.py +47 -0
  81. flyte/_tools.py +27 -0
  82. flyte/_trace.py +119 -0
  83. flyte/_trigger.py +1000 -0
  84. flyte/_utils/__init__.py +30 -0
  85. flyte/_utils/asyn.py +121 -0
  86. flyte/_utils/async_cache.py +139 -0
  87. flyte/_utils/coro_management.py +27 -0
  88. flyte/_utils/docker_credentials.py +173 -0
  89. flyte/_utils/file_handling.py +72 -0
  90. flyte/_utils/helpers.py +134 -0
  91. flyte/_utils/lazy_module.py +54 -0
  92. flyte/_utils/module_loader.py +104 -0
  93. flyte/_utils/org_discovery.py +57 -0
  94. flyte/_utils/uv_script_parser.py +49 -0
  95. flyte/_version.py +34 -0
  96. flyte/app/__init__.py +22 -0
  97. flyte/app/_app_environment.py +157 -0
  98. flyte/app/_deploy.py +125 -0
  99. flyte/app/_input.py +160 -0
  100. flyte/app/_runtime/__init__.py +3 -0
  101. flyte/app/_runtime/app_serde.py +347 -0
  102. flyte/app/_types.py +101 -0
  103. flyte/app/extras/__init__.py +3 -0
  104. flyte/app/extras/_fastapi.py +151 -0
  105. flyte/cli/__init__.py +12 -0
  106. flyte/cli/_abort.py +28 -0
  107. flyte/cli/_build.py +114 -0
  108. flyte/cli/_common.py +468 -0
  109. flyte/cli/_create.py +371 -0
  110. flyte/cli/_delete.py +45 -0
  111. flyte/cli/_deploy.py +293 -0
  112. flyte/cli/_gen.py +176 -0
  113. flyte/cli/_get.py +370 -0
  114. flyte/cli/_option.py +33 -0
  115. flyte/cli/_params.py +554 -0
  116. flyte/cli/_plugins.py +209 -0
  117. flyte/cli/_run.py +597 -0
  118. flyte/cli/_serve.py +64 -0
  119. flyte/cli/_update.py +37 -0
  120. flyte/cli/_user.py +17 -0
  121. flyte/cli/main.py +221 -0
  122. flyte/config/__init__.py +3 -0
  123. flyte/config/_config.py +248 -0
  124. flyte/config/_internal.py +73 -0
  125. flyte/config/_reader.py +225 -0
  126. flyte/connectors/__init__.py +11 -0
  127. flyte/connectors/_connector.py +270 -0
  128. flyte/connectors/_server.py +197 -0
  129. flyte/connectors/utils.py +135 -0
  130. flyte/errors.py +243 -0
  131. flyte/extend.py +19 -0
  132. flyte/extras/__init__.py +5 -0
  133. flyte/extras/_container.py +286 -0
  134. flyte/git/__init__.py +3 -0
  135. flyte/git/_config.py +21 -0
  136. flyte/io/__init__.py +29 -0
  137. flyte/io/_dataframe/__init__.py +131 -0
  138. flyte/io/_dataframe/basic_dfs.py +223 -0
  139. flyte/io/_dataframe/dataframe.py +1026 -0
  140. flyte/io/_dir.py +910 -0
  141. flyte/io/_file.py +914 -0
  142. flyte/io/_hashing_io.py +342 -0
  143. flyte/models.py +479 -0
  144. flyte/py.typed +0 -0
  145. flyte/remote/__init__.py +35 -0
  146. flyte/remote/_action.py +738 -0
  147. flyte/remote/_app.py +57 -0
  148. flyte/remote/_client/__init__.py +0 -0
  149. flyte/remote/_client/_protocols.py +189 -0
  150. flyte/remote/_client/auth/__init__.py +12 -0
  151. flyte/remote/_client/auth/_auth_utils.py +14 -0
  152. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  153. flyte/remote/_client/auth/_authenticators/base.py +403 -0
  154. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  155. flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
  156. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  157. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  158. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  159. flyte/remote/_client/auth/_channel.py +213 -0
  160. flyte/remote/_client/auth/_client_config.py +85 -0
  161. flyte/remote/_client/auth/_default_html.py +32 -0
  162. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  163. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  164. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  165. flyte/remote/_client/auth/_keyring.py +152 -0
  166. flyte/remote/_client/auth/_token_client.py +260 -0
  167. flyte/remote/_client/auth/errors.py +16 -0
  168. flyte/remote/_client/controlplane.py +128 -0
  169. flyte/remote/_common.py +30 -0
  170. flyte/remote/_console.py +19 -0
  171. flyte/remote/_data.py +161 -0
  172. flyte/remote/_logs.py +185 -0
  173. flyte/remote/_project.py +88 -0
  174. flyte/remote/_run.py +386 -0
  175. flyte/remote/_secret.py +142 -0
  176. flyte/remote/_task.py +527 -0
  177. flyte/remote/_trigger.py +306 -0
  178. flyte/remote/_user.py +33 -0
  179. flyte/report/__init__.py +3 -0
  180. flyte/report/_report.py +182 -0
  181. flyte/report/_template.html +124 -0
  182. flyte/storage/__init__.py +36 -0
  183. flyte/storage/_config.py +237 -0
  184. flyte/storage/_parallel_reader.py +274 -0
  185. flyte/storage/_remote_fs.py +34 -0
  186. flyte/storage/_storage.py +456 -0
  187. flyte/storage/_utils.py +5 -0
  188. flyte/syncify/__init__.py +56 -0
  189. flyte/syncify/_api.py +375 -0
  190. flyte/types/__init__.py +52 -0
  191. flyte/types/_interface.py +40 -0
  192. flyte/types/_pickle.py +145 -0
  193. flyte/types/_renderer.py +162 -0
  194. flyte/types/_string_literals.py +119 -0
  195. flyte/types/_type_engine.py +2254 -0
  196. flyte/types/_utils.py +80 -0
  197. flyte-2.0.0b32.data/scripts/debug.py +38 -0
  198. flyte-2.0.0b32.data/scripts/runtime.py +195 -0
  199. flyte-2.0.0b32.dist-info/METADATA +351 -0
  200. flyte-2.0.0b32.dist-info/RECORD +204 -0
  201. flyte-2.0.0b32.dist-info/WHEEL +5 -0
  202. flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
  203. flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
  204. flyte-2.0.0b32.dist-info/top_level.txt +1 -0
flyte/io/_dir.py ADDED
@@ -0,0 +1,910 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import AsyncIterator, Dict, Generic, Iterator, List, Optional, Type, TypeVar, Union
6
+
7
+ from flyteidl2.core import literals_pb2, types_pb2
8
+ from fsspec.asyn import AsyncFileSystem
9
+ from fsspec.utils import get_protocol
10
+ from mashumaro.types import SerializableType
11
+ from pydantic import BaseModel, model_validator
12
+
13
+ import flyte.storage as storage
14
+ from flyte._context import internal_ctx
15
+ from flyte.io._file import File
16
+ from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
17
+
18
+ # Type variable for the directory format
19
+ T = TypeVar("T")
20
+
21
+
22
+ class Dir(BaseModel, Generic[T], SerializableType):
23
+ """
24
+ A generic directory class representing a directory with files of a specified format.
25
+ Provides both async and sync interfaces for directory operations. All methods without _sync suffix are async.
26
+
27
+ The class should be instantiated using one of the class methods. The constructor should only be used to
28
+ instantiate references to existing remote directories.
29
+
30
+ The generic type T represents the format of the files in the directory.
31
+
32
+ Important methods:
33
+ - `from_existing_remote`: Create a Dir object referencing an existing remote directory.
34
+ - `from_local` / `from_local_sync`: Upload a local directory to remote storage.
35
+
36
+ **Asynchronous methods**:
37
+ - `walk`: Asynchronously iterate through files in the directory.
38
+ - `list_files`: Asynchronously get a list of all files (non-recursive).
39
+ - `download`: Asynchronously download the entire directory to a local path.
40
+ - `exists`: Asynchronously check if the directory exists.
41
+ - `get_file`: Asynchronously get a specific file from the directory by name.
42
+
43
+ **Synchronous methods** (suffixed with `_sync`):
44
+ - `walk_sync`: Synchronously iterate through files in the directory.
45
+ - `list_files_sync`: Synchronously get a list of all files (non-recursive).
46
+ - `download_sync`: Synchronously download the entire directory to a local path.
47
+ - `exists_sync`: Synchronously check if the directory exists.
48
+ - `get_file_sync`: Synchronously get a specific file from the directory by name.
49
+
50
+ Example: Walk through directory files recursively (Async).
51
+
52
+ ```python
53
+ @env.task
54
+ async def process_all_files(d: Dir) -> int:
55
+ file_count = 0
56
+ async for file in d.walk(recursive=True):
57
+ async with file.open("rb") as f:
58
+ content = await f.read()
59
+ # Process content
60
+ file_count += 1
61
+ return file_count
62
+ ```
63
+
64
+ Example: Walk through directory files recursively (Sync).
65
+
66
+ ```python
67
+ @env.task
68
+ def process_all_files_sync(d: Dir) -> int:
69
+ file_count = 0
70
+ for file in d.walk_sync(recursive=True):
71
+ with file.open_sync("rb") as f:
72
+ content = f.read()
73
+ # Process content
74
+ file_count += 1
75
+ return file_count
76
+ ```
77
+
78
+ Example: List files in directory (Async).
79
+
80
+ ```python
81
+ @env.task
82
+ async def count_files(d: Dir) -> int:
83
+ files = await d.list_files()
84
+ return len(files)
85
+ ```
86
+
87
+ Example: List files in directory (Sync).
88
+
89
+ ```python
90
+ @env.task
91
+ def count_files_sync(d: Dir) -> int:
92
+ files = d.list_files_sync()
93
+ return len(files)
94
+ ```
95
+
96
+ Example: Get a specific file from directory (Async).
97
+
98
+ ```python
99
+ @env.task
100
+ async def read_config_file(d: Dir) -> str:
101
+ config_file = await d.get_file("config.json")
102
+ if config_file:
103
+ async with config_file.open("rb") as f:
104
+ return (await f.read()).decode("utf-8")
105
+ return "Config not found"
106
+ ```
107
+
108
+ Example: Get a specific file from directory (Sync).
109
+
110
+ ```python
111
+ @env.task
112
+ def read_config_file_sync(d: Dir) -> str:
113
+ config_file = d.get_file_sync("config.json")
114
+ if config_file:
115
+ with config_file.open_sync("rb") as f:
116
+ return f.read().decode("utf-8")
117
+ return "Config not found"
118
+ ```
119
+
120
+ Example: Upload a local directory to remote storage (Async).
121
+
122
+ ```python
123
+ @env.task
124
+ async def upload_directory() -> Dir:
125
+ # Create local directory with files
126
+ os.makedirs("/tmp/my_data", exist_ok=True)
127
+ with open("/tmp/my_data/file1.txt", "w") as f:
128
+ f.write("data1")
129
+ # Upload to remote storage
130
+ return await Dir.from_local("/tmp/my_data/")
131
+ ```
132
+
133
+ Example: Upload a local directory to remote storage (Sync).
134
+
135
+ ```python
136
+ @env.task
137
+ def upload_directory_sync() -> Dir:
138
+ # Create local directory with files
139
+ os.makedirs("/tmp/my_data", exist_ok=True)
140
+ with open("/tmp/my_data/file1.txt", "w") as f:
141
+ f.write("data1")
142
+ # Upload to remote storage
143
+ return Dir.from_local_sync("/tmp/my_data/")
144
+ ```
145
+
146
+ Example: Download a directory to local storage (Async).
147
+
148
+ ```python
149
+ @env.task
150
+ async def download_directory(d: Dir) -> str:
151
+ local_path = await d.download()
152
+ # Process files in local directory
153
+ return local_path
154
+ ```
155
+
156
+ Example: Download a directory to local storage (Sync).
157
+
158
+ ```python
159
+ @env.task
160
+ def download_directory_sync(d: Dir) -> str:
161
+ local_path = d.download_sync()
162
+ # Process files in local directory
163
+ return local_path
164
+ ```
165
+
166
+ Example: Reference an existing remote directory.
167
+
168
+ ```python
169
+ @env.task
170
+ async def process_existing_dir() -> int:
171
+ d = Dir.from_existing_remote("s3://my-bucket/data/")
172
+ files = await d.list_files()
173
+ return len(files)
174
+ ```
175
+
176
+ Example: Check if directory exists (Async).
177
+
178
+ ```python
179
+ @env.task
180
+ async def check_directory(d: Dir) -> bool:
181
+ return await d.exists()
182
+ ```
183
+
184
+ Example: Check if directory exists (Sync).
185
+
186
+ ```python
187
+ @env.task
188
+ def check_directory_sync(d: Dir) -> bool:
189
+ return d.exists_sync()
190
+ ```
191
+
192
+ Args:
193
+ path: The path to the directory (can be local or remote)
194
+ name: Optional name for the directory (defaults to basename of path)
195
+ """
196
+
197
+ # Represents either a local or remote path.
198
+ path: str
199
+ name: Optional[str] = None
200
+ format: str = ""
201
+ hash: Optional[str] = None
202
+
203
+ class Config:
204
+ arbitrary_types_allowed = True
205
+
206
+ @model_validator(mode="before")
207
+ @classmethod
208
+ def pre_init(cls, data):
209
+ """Internal: Pydantic validator to set default name from path. Not intended for direct use."""
210
+ if data.get("name") is None:
211
+ data["name"] = Path(data["path"]).name
212
+ return data
213
+
214
+ def _serialize(self) -> Dict[str, Optional[str]]:
215
+ """Internal: Serialize Dir to dictionary. Not intended for direct use."""
216
+ pyd_dump = self.model_dump()
217
+ return pyd_dump
218
+
219
+ @classmethod
220
+ def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> Dir:
221
+ """Internal: Deserialize Dir from dictionary. Not intended for direct use."""
222
+ return cls.model_validate(file_dump)
223
+
224
+ @classmethod
225
+ def schema_match(cls, incoming: dict):
226
+ """Internal: Check if incoming schema matches Dir schema. Not intended for direct use."""
227
+ this_schema = cls.model_json_schema()
228
+ current_required = this_schema.get("required")
229
+ incoming_required = incoming.get("required")
230
+ if (
231
+ current_required
232
+ and incoming_required
233
+ and incoming.get("type") == this_schema.get("type")
234
+ and incoming.get("title") == this_schema.get("title")
235
+ and set(current_required) == set(incoming_required)
236
+ ):
237
+ return True
238
+
239
+ async def walk(self, recursive: bool = True, max_depth: Optional[int] = None) -> AsyncIterator[File[T]]:
240
+ """
241
+ Asynchronously walk through the directory and yield File objects.
242
+
243
+ Use this to iterate through all files in a directory. Each yielded File can be read directly without
244
+ downloading.
245
+
246
+ Example (Async - Recursive):
247
+
248
+ ```python
249
+ @env.task
250
+ async def list_all_files(d: Dir) -> list[str]:
251
+ file_names = []
252
+ async for file in d.walk(recursive=True):
253
+ file_names.append(file.name)
254
+ return file_names
255
+ ```
256
+
257
+ Example (Async - Non-recursive):
258
+
259
+ ```python
260
+ @env.task
261
+ async def list_top_level_files(d: Dir) -> list[str]:
262
+ file_names = []
263
+ async for file in d.walk(recursive=False):
264
+ file_names.append(file.name)
265
+ return file_names
266
+ ```
267
+
268
+ Example (Async - With max depth):
269
+
270
+ ```python
271
+ @env.task
272
+ async def list_files_max_depth(d: Dir) -> list[str]:
273
+ file_names = []
274
+ async for file in d.walk(recursive=True, max_depth=2):
275
+ file_names.append(file.name)
276
+ return file_names
277
+ ```
278
+
279
+ Args:
280
+ recursive: If True, recursively walk subdirectories. If False, only list files in the top-level directory.
281
+ max_depth: Maximum depth for recursive walking. If None, walk through all subdirectories.
282
+
283
+ Yields:
284
+ File objects for each file found in the directory
285
+ """
286
+ fs = storage.get_underlying_filesystem(path=self.path)
287
+ if recursive is False:
288
+ max_depth = 2
289
+
290
+ # Note if the path is actually just a file, no walking is done.
291
+ if isinstance(fs, AsyncFileSystem):
292
+ async for parent, _, files in fs._walk(self.path, maxdepth=max_depth):
293
+ for file in files:
294
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
295
+ yield File[T](path=full_file)
296
+ else:
297
+ for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
298
+ for file in files:
299
+ if "file" in fs.protocol:
300
+ full_file = os.path.join(parent, file)
301
+ else:
302
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
303
+ yield File[T](path=full_file)
304
+
305
+ def walk_sync(
306
+ self, recursive: bool = True, file_pattern: str = "*", max_depth: Optional[int] = None
307
+ ) -> Iterator[File[T]]:
308
+ """
309
+ Synchronously walk through the directory and yield File objects.
310
+
311
+ Use this in non-async tasks to iterate through all files in a directory.
312
+
313
+ Example (Sync - Recursive):
314
+
315
+ ```python
316
+ @env.task
317
+ def list_all_files_sync(d: Dir) -> list[str]:
318
+ file_names = []
319
+ for file in d.walk_sync(recursive=True):
320
+ file_names.append(file.name)
321
+ return file_names
322
+ ```
323
+
324
+ Example (Sync - With file pattern):
325
+
326
+ ```python
327
+ @env.task
328
+ def list_text_files(d: Dir) -> list[str]:
329
+ file_names = []
330
+ for file in d.walk_sync(recursive=True, file_pattern="*.txt"):
331
+ file_names.append(file.name)
332
+ return file_names
333
+ ```
334
+
335
+ Example (Sync - Non-recursive with max depth):
336
+
337
+ ```python
338
+ @env.task
339
+ def list_files_limited(d: Dir) -> list[str]:
340
+ file_names = []
341
+ for file in d.walk_sync(recursive=True, max_depth=2):
342
+ file_names.append(file.name)
343
+ return file_names
344
+ ```
345
+
346
+ Args:
347
+ recursive: If True, recursively walk subdirectories. If False, only list files in the top-level directory.
348
+ file_pattern: Glob pattern to filter files (e.g., "*.txt", "*.csv"). Default is "*" (all files).
349
+ max_depth: Maximum depth for recursive walking. If None, walk through all subdirectories.
350
+
351
+ Yields:
352
+ File objects for each file found in the directory
353
+ """
354
+ fs = storage.get_underlying_filesystem(path=self.path)
355
+ for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
356
+ for file in files:
357
+ if "file" in fs.protocol:
358
+ full_file = os.path.join(parent, file)
359
+ else:
360
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
361
+ yield File[T](path=full_file)
362
+
363
+ async def list_files(self) -> List[File[T]]:
364
+ """
365
+ Asynchronously get a list of all files in the directory (non-recursive).
366
+
367
+ Use this when you need a list of all files in the top-level directory at once.
368
+
369
+ Returns:
370
+ A list of File objects for files in the top-level directory
371
+
372
+ Example (Async):
373
+
374
+ ```python
375
+ @env.task
376
+ async def count_files(d: Dir) -> int:
377
+ files = await d.list_files()
378
+ return len(files)
379
+ ```
380
+
381
+ Example (Async - Process files):
382
+
383
+ ```python
384
+ @env.task
385
+ async def process_all_files(d: Dir) -> list[str]:
386
+ files = await d.list_files()
387
+ contents = []
388
+ for file in files:
389
+ async with file.open("rb") as f:
390
+ content = await f.read()
391
+ contents.append(content.decode("utf-8"))
392
+ return contents
393
+ ```
394
+ """
395
+ # todo: this should probably also just defer to fsspec.find()
396
+ files = []
397
+ async for file in self.walk(recursive=False):
398
+ files.append(file)
399
+ return files
400
+
401
+ def list_files_sync(self) -> List[File[T]]:
402
+ """
403
+ Synchronously get a list of all files in the directory (non-recursive).
404
+
405
+ Use this in non-async tasks when you need a list of all files in the top-level directory at once.
406
+
407
+ Returns:
408
+ A list of File objects for files in the top-level directory
409
+
410
+ Example (Sync):
411
+
412
+ ```python
413
+ @env.task
414
+ def count_files_sync(d: Dir) -> int:
415
+ files = d.list_files_sync()
416
+ return len(files)
417
+ ```
418
+
419
+ Example (Sync - Process files):
420
+
421
+ ```python
422
+ @env.task
423
+ def process_all_files_sync(d: Dir) -> list[str]:
424
+ files = d.list_files_sync()
425
+ contents = []
426
+ for file in files:
427
+ with file.open_sync("rb") as f:
428
+ content = f.read()
429
+ contents.append(content.decode("utf-8"))
430
+ return contents
431
+ ```
432
+ """
433
+ return list(self.walk_sync(recursive=False))
434
+
435
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
436
+ """
437
+ Asynchronously download the entire directory to a local path.
438
+
439
+ Use this when you need to download all files in a directory to your local filesystem for processing.
440
+
441
+ Example (Async):
442
+
443
+ ```python
444
+ @env.task
445
+ async def download_directory(d: Dir) -> str:
446
+ local_dir = await d.download()
447
+ # Process files in the local directory
448
+ return local_dir
449
+ ```
450
+
451
+ Example (Async - Download to specific path):
452
+
453
+ ```python
454
+ @env.task
455
+ async def download_to_path(d: Dir) -> str:
456
+ local_dir = await d.download("/tmp/my_data/")
457
+ return local_dir
458
+ ```
459
+
460
+ Args:
461
+ local_path: The local path to download the directory to. If None, a temporary
462
+ directory will be used and a path will be generated.
463
+
464
+ Returns:
465
+ The absolute path to the downloaded directory
466
+ """
467
+ # If no local_path specified, create a unique path + append source directory name
468
+ if local_path is None:
469
+ unique_path = storage.get_random_local_path()
470
+ source_dirname = Path(self.path).name # will need to be updated for windows
471
+ local_dest = str(Path(unique_path) / source_dirname)
472
+ else:
473
+ # If local_path is specified, use it directly (contents go into it)
474
+ local_dest = str(local_path)
475
+
476
+ if not storage.is_remote(self.path):
477
+ if not local_path or local_path == self.path:
478
+ # Skip copying
479
+ return self.path
480
+ else:
481
+ # Shell out to a thread to copy
482
+ import asyncio
483
+ import shutil
484
+
485
+ async def copy_tree():
486
+ loop = asyncio.get_event_loop()
487
+ await loop.run_in_executor(None, lambda: shutil.copytree(self.path, local_dest, dirs_exist_ok=True))
488
+
489
+ await copy_tree()
490
+ return local_dest
491
+ return await storage.get(self.path, local_dest, recursive=True)
492
+
493
+ def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
494
+ """
495
+ Synchronously download the entire directory to a local path.
496
+
497
+ Use this in non-async tasks when you need to download all files in a directory to your local filesystem.
498
+
499
+ Example (Sync):
500
+
501
+ ```python
502
+ @env.task
503
+ def download_directory_sync(d: Dir) -> str:
504
+ local_dir = d.download_sync()
505
+ # Process files in the local directory
506
+ return local_dir
507
+ ```
508
+
509
+ Example (Sync - Download to specific path):
510
+
511
+ ```python
512
+ @env.task
513
+ def download_to_path_sync(d: Dir) -> str:
514
+ local_dir = d.download_sync("/tmp/my_data/")
515
+ return local_dir
516
+ ```
517
+ Args:
518
+ local_path: The local path to download the directory to. If None, a temporary
519
+ directory will be used and a path will be generated.
520
+
521
+ Returns:
522
+ The absolute path to the downloaded directory
523
+ """
524
+ # If no local_path specified, create a unique path + append source directory name
525
+ if local_path is None:
526
+ unique_path = storage.get_random_local_path()
527
+ source_dirname = Path(self.path).name
528
+ local_dest = str(Path(unique_path) / source_dirname)
529
+ else:
530
+ # If local_path is specified, use it directly (contents go into it)
531
+ local_dest = str(local_path)
532
+
533
+ if not storage.is_remote(self.path):
534
+ if not local_path or local_path == self.path:
535
+ # Skip copying
536
+ return self.path
537
+ else:
538
+ # Shell out to a thread to copy
539
+ import shutil
540
+
541
+ shutil.copytree(self.path, local_dest, dirs_exist_ok=True)
542
+ return local_dest
543
+
544
+ fs = storage.get_underlying_filesystem(path=self.path)
545
+ fs.get(self.path, local_dest, recursive=True)
546
+ return local_dest
547
+
548
+ @classmethod
549
+ async def from_local(
550
+ cls,
551
+ local_path: Union[str, Path],
552
+ remote_destination: Optional[str] = None,
553
+ dir_cache_key: Optional[str] = None,
554
+ ) -> Dir[T]:
555
+ """
556
+ Asynchronously create a new Dir by uploading a local directory to remote storage.
557
+
558
+ Use this in async tasks when you have a local directory that needs to be uploaded to remote storage.
559
+
560
+ Example (Async):
561
+
562
+ ```python
563
+ @env.task
564
+ async def upload_local_directory() -> Dir:
565
+ # Create a local directory with files
566
+ os.makedirs("/tmp/data_dir", exist_ok=True)
567
+ with open("/tmp/data_dir/file1.txt", "w") as f:
568
+ f.write("data1")
569
+
570
+ # Upload to remote storage
571
+ remote_dir = await Dir.from_local("/tmp/data_dir/")
572
+ return remote_dir
573
+ ```
574
+
575
+ Example (Async - With specific destination):
576
+
577
+ ```python
578
+ @env.task
579
+ async def upload_to_specific_path() -> Dir:
580
+ remote_dir = await Dir.from_local("/tmp/data_dir/", "s3://my-bucket/data/")
581
+ return remote_dir
582
+ ```
583
+
584
+ Example (Async - With cache key):
585
+
586
+ ```python
587
+ @env.task
588
+ async def upload_with_cache_key() -> Dir:
589
+ remote_dir = await Dir.from_local("/tmp/data_dir/", dir_cache_key="my_cache_key_123")
590
+ return remote_dir
591
+ ```
592
+ Args:
593
+ local_path: Path to the local directory
594
+ remote_destination: Optional remote path to store the directory. If None, a path will be automatically
595
+ generated.
596
+ dir_cache_key: Optional precomputed hash value to use for cache key computation when this Dir is used
597
+ as an input to discoverable tasks. If not specified, the cache key will be based on
598
+ directory attributes.
599
+
600
+ Returns:
601
+ A new Dir instance pointing to the uploaded directory
602
+ """
603
+ local_path_str = str(local_path)
604
+ dirname = os.path.basename(os.path.normpath(local_path_str))
605
+ resolved_remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(dirname)
606
+ protocol = get_protocol(resolved_remote_path)
607
+
608
+ # Shortcut for local, don't copy and just return
609
+ if "file" in protocol and remote_destination is None:
610
+ output_path = str(Path(local_path).absolute())
611
+ return cls(path=output_path, name=dirname, hash=dir_cache_key)
612
+
613
+ # todo: in the future, mirror File and set the file to_path here
614
+ output_path = await storage.put(from_path=local_path_str, to_path=remote_destination, recursive=True)
615
+ return cls(path=output_path, name=dirname, hash=dir_cache_key)
616
+
617
+ @classmethod
618
+ def from_local_sync(
619
+ cls,
620
+ local_path: Union[str, Path],
621
+ remote_destination: Optional[str] = None,
622
+ dir_cache_key: Optional[str] = None,
623
+ ) -> Dir[T]:
624
+ """
625
+ Synchronously create a new Dir by uploading a local directory to remote storage.
626
+
627
+ Use this in non-async tasks when you have a local directory that needs to be uploaded to remote storage.
628
+
629
+ Example (Sync):
630
+
631
+ ```python
632
+ @env.task
633
+ def upload_local_directory_sync() -> Dir:
634
+ # Create a local directory with files
635
+ os.makedirs("/tmp/data_dir", exist_ok=True)
636
+ with open("/tmp/data_dir/file1.txt", "w") as f:
637
+ f.write("data1")
638
+
639
+ # Upload to remote storage
640
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/")
641
+ return remote_dir
642
+ ```
643
+
644
+ Example (Sync - With specific destination):
645
+
646
+ ```python
647
+ @env.task
648
+ def upload_to_specific_path_sync() -> Dir:
649
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/", "s3://my-bucket/data/")
650
+ return remote_dir
651
+ ```
652
+
653
+ Example (Sync - With cache key):
654
+
655
+ ```python
656
+ @env.task
657
+ def upload_with_cache_key_sync() -> Dir:
658
+ remote_dir = Dir.from_local_sync("/tmp/data_dir/", dir_cache_key="my_cache_key_123")
659
+ return remote_dir
660
+ ```
661
+
662
+ Args:
663
+ local_path: Path to the local directory
664
+ remote_destination: Optional remote path to store the directory. If None, a path will be automatically
665
+ generated.
666
+ dir_cache_key: Optional precomputed hash value to use for cache key computation when this Dir is used
667
+ as an input to discoverable tasks. If not specified, the cache key will be based on
668
+ directory attributes.
669
+
670
+ Returns:
671
+ A new Dir instance pointing to the uploaded directory
672
+ """
673
+ local_path_str = str(local_path)
674
+ dirname = os.path.basename(os.path.normpath(local_path_str))
675
+
676
+ resolved_remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(dirname)
677
+ protocol = get_protocol(resolved_remote_path)
678
+
679
+ # Shortcut for local, don't copy and just return
680
+ if "file" in protocol and remote_destination is None:
681
+ output_path = str(Path(local_path).absolute())
682
+ return cls(path=output_path, name=dirname, hash=dir_cache_key)
683
+
684
+ fs = storage.get_underlying_filesystem(path=resolved_remote_path)
685
+ fs.put(local_path_str, resolved_remote_path, recursive=True)
686
+ return cls(path=resolved_remote_path, name=dirname, hash=dir_cache_key)
687
+
688
+ @classmethod
689
+ def from_existing_remote(cls, remote_path: str, dir_cache_key: Optional[str] = None) -> Dir[T]:
690
+ """
691
+ Create a Dir reference from an existing remote directory.
692
+
693
+ Use this when you want to reference a directory that already exists in remote storage without uploading it.
694
+
695
+ Example:
696
+
697
+ ```python
698
+ @env.task
699
+ async def process_existing_directory() -> int:
700
+ d = Dir.from_existing_remote("s3://my-bucket/data/")
701
+ files = await d.list_files()
702
+ return len(files)
703
+ ```
704
+
705
+ Example (With cache key):
706
+
707
+ ```python
708
+ @env.task
709
+ async def process_with_cache_key() -> int:
710
+ d = Dir.from_existing_remote("s3://my-bucket/data/", dir_cache_key="abc123")
711
+ files = await d.list_files()
712
+ return len(files)
713
+ ```
714
+
715
+ Args:
716
+ remote_path: The remote path to the existing directory
717
+ dir_cache_key: Optional hash value to use for cache key computation. If not specified,
718
+ the cache key will be computed based on the directory's attributes.
719
+
720
+ Returns:
721
+ A new Dir instance pointing to the existing remote directory
722
+ """
723
+ return cls(path=remote_path, hash=dir_cache_key)
724
+
725
+ async def exists(self) -> bool:
726
+ """
727
+ Asynchronously check if the directory exists.
728
+
729
+ Returns:
730
+ True if the directory exists, False otherwise
731
+
732
+ Example (Async):
733
+
734
+ ```python
735
+ @env.task
736
+ async def check_directory(d: Dir) -> bool:
737
+ if await d.exists():
738
+ print("Directory exists!")
739
+ return True
740
+ return False
741
+ ```
742
+ """
743
+ fs = storage.get_underlying_filesystem(path=self.path)
744
+ if isinstance(fs, AsyncFileSystem):
745
+ return await fs._exists(self.path)
746
+ else:
747
+ return fs.exists(self.path)
748
+
749
+ def exists_sync(self) -> bool:
750
+ """
751
+ Synchronously check if the directory exists.
752
+
753
+ Use this in non-async tasks or when you need synchronous directory existence checking.
754
+
755
+ Returns:
756
+ True if the directory exists, False otherwise
757
+
758
+ Example (Sync):
759
+
760
+ ```python
761
+ @env.task
762
+ def check_directory_sync(d: Dir) -> bool:
763
+ if d.exists_sync():
764
+ print("Directory exists!")
765
+ return True
766
+ return False
767
+ ```
768
+ """
769
+ fs = storage.get_underlying_filesystem(path=self.path)
770
+ return fs.exists(self.path)
771
+
772
+ async def get_file(self, file_name: str) -> Optional[File[T]]:
773
+ """
774
+ Asynchronously get a specific file from the directory by name.
775
+
776
+ Use this when you know the name of a specific file in the directory you want to access.
777
+
778
+ Example (Async):
779
+
780
+ ```python
781
+ @env.task
782
+ async def read_specific_file(d: Dir) -> str:
783
+ file = await d.get_file("data.csv")
784
+ if file:
785
+ async with file.open("rb") as f:
786
+ content = await f.read()
787
+ return content.decode("utf-8")
788
+ return "File not found"
789
+ ```
790
+
791
+ Args:
792
+ file_name: The name of the file to get
793
+
794
+ Returns:
795
+ A File instance if the file exists, None otherwise
796
+ """
797
+ fs = storage.get_underlying_filesystem(path=self.path)
798
+ file_path = fs.sep.join([self.path, file_name])
799
+ file = File[T](path=file_path)
800
+
801
+ if fs.exists(file_path):
802
+ return file
803
+ return None
804
+
805
+ def get_file_sync(self, file_name: str) -> Optional[File[T]]:
806
+ """
807
+ Synchronously get a specific file from the directory by name.
808
+
809
+ Use this in non-async tasks when you know the name of a specific file in the directory you want to access.
810
+
811
+ Example (Sync):
812
+
813
+ ```python
814
+ @env.task
815
+ def read_specific_file_sync(d: Dir) -> str:
816
+ file = d.get_file_sync("data.csv")
817
+ if file:
818
+ with file.open_sync("rb") as f:
819
+ content = f.read()
820
+ return content.decode("utf-8")
821
+ return "File not found"
822
+ ```
823
+
824
+ Args:
825
+ file_name: The name of the file to get
826
+
827
+ Returns:
828
+ A File instance if the file exists, None otherwise
829
+ """
830
+ file_path = os.path.join(self.path, file_name)
831
+ file = File[T](path=file_path)
832
+
833
+ if file.exists_sync():
834
+ return file
835
+ return None
836
+
837
+
838
+ class DirTransformer(TypeTransformer[Dir]):
839
+ """
840
+ Transformer for Dir objects. This type transformer does not handle any i/o. That is now the responsibility of the
841
+ user.
842
+ """
843
+
844
+ def __init__(self):
845
+ super().__init__(name="Dir", t=Dir)
846
+
847
+ def get_literal_type(self, t: Type[Dir]) -> types_pb2.LiteralType:
848
+ """Get the Flyte literal type for a File type."""
849
+ return types_pb2.LiteralType(
850
+ blob=types_pb2.BlobType(
851
+ # todo: set format from generic
852
+ format="", # Format is determined by the generic type T
853
+ dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART,
854
+ )
855
+ )
856
+
857
+ async def to_literal(
858
+ self,
859
+ python_val: Dir,
860
+ python_type: Type[Dir],
861
+ expected: types_pb2.LiteralType,
862
+ ) -> literals_pb2.Literal:
863
+ """Convert a Dir object to a Flyte literal."""
864
+ if not isinstance(python_val, Dir):
865
+ raise TypeTransformerFailedError(f"Expected Dir object, received {type(python_val)}")
866
+
867
+ return literals_pb2.Literal(
868
+ scalar=literals_pb2.Scalar(
869
+ blob=literals_pb2.Blob(
870
+ metadata=literals_pb2.BlobMetadata(
871
+ type=types_pb2.BlobType(
872
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART
873
+ )
874
+ ),
875
+ uri=python_val.path,
876
+ )
877
+ ),
878
+ hash=python_val.hash if python_val.hash else None,
879
+ )
880
+
881
+ async def to_python_value(
882
+ self,
883
+ lv: literals_pb2.Literal,
884
+ expected_python_type: Type[Dir],
885
+ ) -> Dir:
886
+ """Convert a Flyte literal to a File object."""
887
+ if not lv.scalar.HasField("blob"):
888
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
889
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART:
890
+ raise TypeTransformerFailedError(
891
+ f"Expected multipart, received {lv.scalar.blob.metadata.type.dimensionality}"
892
+ )
893
+
894
+ uri = lv.scalar.blob.uri
895
+ filename = Path(uri).name
896
+ hash_value = lv.hash if lv.hash else None
897
+ f: Dir = Dir(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format, hash=hash_value)
898
+ return f
899
+
900
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[Dir]:
901
+ """Guess the Python type from a Flyte literal type."""
902
+ if (
903
+ literal_type.HasField("blob")
904
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART
905
+ ):
906
+ return Dir
907
+ raise ValueError(f"Cannot guess python type from {literal_type}")
908
+
909
+
910
+ TypeEngine.register(DirTransformer())