flyte 2.0.0b32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (204) hide show
  1. flyte/__init__.py +108 -0
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +195 -0
  5. flyte/_bin/serve.py +178 -0
  6. flyte/_build.py +26 -0
  7. flyte/_cache/__init__.py +12 -0
  8. flyte/_cache/cache.py +147 -0
  9. flyte/_cache/defaults.py +9 -0
  10. flyte/_cache/local_cache.py +216 -0
  11. flyte/_cache/policy_function_body.py +42 -0
  12. flyte/_code_bundle/__init__.py +8 -0
  13. flyte/_code_bundle/_ignore.py +121 -0
  14. flyte/_code_bundle/_packaging.py +218 -0
  15. flyte/_code_bundle/_utils.py +347 -0
  16. flyte/_code_bundle/bundle.py +266 -0
  17. flyte/_constants.py +1 -0
  18. flyte/_context.py +155 -0
  19. flyte/_custom_context.py +73 -0
  20. flyte/_debug/__init__.py +0 -0
  21. flyte/_debug/constants.py +38 -0
  22. flyte/_debug/utils.py +17 -0
  23. flyte/_debug/vscode.py +307 -0
  24. flyte/_deploy.py +408 -0
  25. flyte/_deployer.py +109 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +122 -0
  29. flyte/_excepthook.py +37 -0
  30. flyte/_group.py +32 -0
  31. flyte/_hash.py +8 -0
  32. flyte/_image.py +1055 -0
  33. flyte/_initialize.py +628 -0
  34. flyte/_interface.py +119 -0
  35. flyte/_internal/__init__.py +3 -0
  36. flyte/_internal/controllers/__init__.py +129 -0
  37. flyte/_internal/controllers/_local_controller.py +239 -0
  38. flyte/_internal/controllers/_trace.py +48 -0
  39. flyte/_internal/controllers/remote/__init__.py +58 -0
  40. flyte/_internal/controllers/remote/_action.py +211 -0
  41. flyte/_internal/controllers/remote/_client.py +47 -0
  42. flyte/_internal/controllers/remote/_controller.py +583 -0
  43. flyte/_internal/controllers/remote/_core.py +465 -0
  44. flyte/_internal/controllers/remote/_informer.py +381 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +3 -0
  47. flyte/_internal/imagebuild/docker_builder.py +706 -0
  48. flyte/_internal/imagebuild/image_builder.py +277 -0
  49. flyte/_internal/imagebuild/remote_builder.py +386 -0
  50. flyte/_internal/imagebuild/utils.py +78 -0
  51. flyte/_internal/resolvers/__init__.py +0 -0
  52. flyte/_internal/resolvers/_task_module.py +21 -0
  53. flyte/_internal/resolvers/common.py +31 -0
  54. flyte/_internal/resolvers/default.py +28 -0
  55. flyte/_internal/runtime/__init__.py +0 -0
  56. flyte/_internal/runtime/convert.py +486 -0
  57. flyte/_internal/runtime/entrypoints.py +204 -0
  58. flyte/_internal/runtime/io.py +188 -0
  59. flyte/_internal/runtime/resources_serde.py +152 -0
  60. flyte/_internal/runtime/reuse.py +125 -0
  61. flyte/_internal/runtime/rusty.py +193 -0
  62. flyte/_internal/runtime/task_serde.py +362 -0
  63. flyte/_internal/runtime/taskrunner.py +209 -0
  64. flyte/_internal/runtime/trigger_serde.py +160 -0
  65. flyte/_internal/runtime/types_serde.py +54 -0
  66. flyte/_keyring/__init__.py +0 -0
  67. flyte/_keyring/file.py +115 -0
  68. flyte/_logging.py +300 -0
  69. flyte/_map.py +312 -0
  70. flyte/_module.py +72 -0
  71. flyte/_pod.py +30 -0
  72. flyte/_resources.py +473 -0
  73. flyte/_retry.py +32 -0
  74. flyte/_reusable_environment.py +102 -0
  75. flyte/_run.py +724 -0
  76. flyte/_secret.py +96 -0
  77. flyte/_task.py +550 -0
  78. flyte/_task_environment.py +316 -0
  79. flyte/_task_plugins.py +47 -0
  80. flyte/_timeout.py +47 -0
  81. flyte/_tools.py +27 -0
  82. flyte/_trace.py +119 -0
  83. flyte/_trigger.py +1000 -0
  84. flyte/_utils/__init__.py +30 -0
  85. flyte/_utils/asyn.py +121 -0
  86. flyte/_utils/async_cache.py +139 -0
  87. flyte/_utils/coro_management.py +27 -0
  88. flyte/_utils/docker_credentials.py +173 -0
  89. flyte/_utils/file_handling.py +72 -0
  90. flyte/_utils/helpers.py +134 -0
  91. flyte/_utils/lazy_module.py +54 -0
  92. flyte/_utils/module_loader.py +104 -0
  93. flyte/_utils/org_discovery.py +57 -0
  94. flyte/_utils/uv_script_parser.py +49 -0
  95. flyte/_version.py +34 -0
  96. flyte/app/__init__.py +22 -0
  97. flyte/app/_app_environment.py +157 -0
  98. flyte/app/_deploy.py +125 -0
  99. flyte/app/_input.py +160 -0
  100. flyte/app/_runtime/__init__.py +3 -0
  101. flyte/app/_runtime/app_serde.py +347 -0
  102. flyte/app/_types.py +101 -0
  103. flyte/app/extras/__init__.py +3 -0
  104. flyte/app/extras/_fastapi.py +151 -0
  105. flyte/cli/__init__.py +12 -0
  106. flyte/cli/_abort.py +28 -0
  107. flyte/cli/_build.py +114 -0
  108. flyte/cli/_common.py +468 -0
  109. flyte/cli/_create.py +371 -0
  110. flyte/cli/_delete.py +45 -0
  111. flyte/cli/_deploy.py +293 -0
  112. flyte/cli/_gen.py +176 -0
  113. flyte/cli/_get.py +370 -0
  114. flyte/cli/_option.py +33 -0
  115. flyte/cli/_params.py +554 -0
  116. flyte/cli/_plugins.py +209 -0
  117. flyte/cli/_run.py +597 -0
  118. flyte/cli/_serve.py +64 -0
  119. flyte/cli/_update.py +37 -0
  120. flyte/cli/_user.py +17 -0
  121. flyte/cli/main.py +221 -0
  122. flyte/config/__init__.py +3 -0
  123. flyte/config/_config.py +248 -0
  124. flyte/config/_internal.py +73 -0
  125. flyte/config/_reader.py +225 -0
  126. flyte/connectors/__init__.py +11 -0
  127. flyte/connectors/_connector.py +270 -0
  128. flyte/connectors/_server.py +197 -0
  129. flyte/connectors/utils.py +135 -0
  130. flyte/errors.py +243 -0
  131. flyte/extend.py +19 -0
  132. flyte/extras/__init__.py +5 -0
  133. flyte/extras/_container.py +286 -0
  134. flyte/git/__init__.py +3 -0
  135. flyte/git/_config.py +21 -0
  136. flyte/io/__init__.py +29 -0
  137. flyte/io/_dataframe/__init__.py +131 -0
  138. flyte/io/_dataframe/basic_dfs.py +223 -0
  139. flyte/io/_dataframe/dataframe.py +1026 -0
  140. flyte/io/_dir.py +910 -0
  141. flyte/io/_file.py +914 -0
  142. flyte/io/_hashing_io.py +342 -0
  143. flyte/models.py +479 -0
  144. flyte/py.typed +0 -0
  145. flyte/remote/__init__.py +35 -0
  146. flyte/remote/_action.py +738 -0
  147. flyte/remote/_app.py +57 -0
  148. flyte/remote/_client/__init__.py +0 -0
  149. flyte/remote/_client/_protocols.py +189 -0
  150. flyte/remote/_client/auth/__init__.py +12 -0
  151. flyte/remote/_client/auth/_auth_utils.py +14 -0
  152. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  153. flyte/remote/_client/auth/_authenticators/base.py +403 -0
  154. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  155. flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
  156. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  157. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  158. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  159. flyte/remote/_client/auth/_channel.py +213 -0
  160. flyte/remote/_client/auth/_client_config.py +85 -0
  161. flyte/remote/_client/auth/_default_html.py +32 -0
  162. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  163. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  164. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  165. flyte/remote/_client/auth/_keyring.py +152 -0
  166. flyte/remote/_client/auth/_token_client.py +260 -0
  167. flyte/remote/_client/auth/errors.py +16 -0
  168. flyte/remote/_client/controlplane.py +128 -0
  169. flyte/remote/_common.py +30 -0
  170. flyte/remote/_console.py +19 -0
  171. flyte/remote/_data.py +161 -0
  172. flyte/remote/_logs.py +185 -0
  173. flyte/remote/_project.py +88 -0
  174. flyte/remote/_run.py +386 -0
  175. flyte/remote/_secret.py +142 -0
  176. flyte/remote/_task.py +527 -0
  177. flyte/remote/_trigger.py +306 -0
  178. flyte/remote/_user.py +33 -0
  179. flyte/report/__init__.py +3 -0
  180. flyte/report/_report.py +182 -0
  181. flyte/report/_template.html +124 -0
  182. flyte/storage/__init__.py +36 -0
  183. flyte/storage/_config.py +237 -0
  184. flyte/storage/_parallel_reader.py +274 -0
  185. flyte/storage/_remote_fs.py +34 -0
  186. flyte/storage/_storage.py +456 -0
  187. flyte/storage/_utils.py +5 -0
  188. flyte/syncify/__init__.py +56 -0
  189. flyte/syncify/_api.py +375 -0
  190. flyte/types/__init__.py +52 -0
  191. flyte/types/_interface.py +40 -0
  192. flyte/types/_pickle.py +145 -0
  193. flyte/types/_renderer.py +162 -0
  194. flyte/types/_string_literals.py +119 -0
  195. flyte/types/_type_engine.py +2254 -0
  196. flyte/types/_utils.py +80 -0
  197. flyte-2.0.0b32.data/scripts/debug.py +38 -0
  198. flyte-2.0.0b32.data/scripts/runtime.py +195 -0
  199. flyte-2.0.0b32.dist-info/METADATA +351 -0
  200. flyte-2.0.0b32.dist-info/RECORD +204 -0
  201. flyte-2.0.0b32.dist-info/WHEEL +5 -0
  202. flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
  203. flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
  204. flyte-2.0.0b32.dist-info/top_level.txt +1 -0
flyte/io/_file.py ADDED
@@ -0,0 +1,914 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import os
5
+ import typing
6
+ from contextlib import asynccontextmanager, contextmanager
7
+ from pathlib import Path
8
+ from typing import (
9
+ IO,
10
+ Annotated,
11
+ Any,
12
+ AsyncGenerator,
13
+ Dict,
14
+ Generator,
15
+ Generic,
16
+ Optional,
17
+ Type,
18
+ TypeVar,
19
+ Union,
20
+ )
21
+
22
+ import aiofiles
23
+ from flyteidl2.core import literals_pb2, types_pb2
24
+ from fsspec.utils import get_protocol
25
+ from mashumaro.types import SerializableType
26
+ from pydantic import BaseModel, Field, model_validator
27
+ from pydantic.json_schema import SkipJsonSchema
28
+
29
+ import flyte.errors
30
+ import flyte.storage as storage
31
+ from flyte._context import internal_ctx
32
+ from flyte._initialize import requires_initialization
33
+ from flyte.io._hashing_io import AsyncHashingReader, HashingWriter, HashMethod, PrecomputedValue
34
+ from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
35
+
36
+ if typing.TYPE_CHECKING:
37
+ from obstore import AsyncReadableFile, AsyncWritableFile
38
+
39
+ if typing.TYPE_CHECKING:
40
+ from obstore import AsyncReadableFile, AsyncWritableFile
41
+
42
+ # Type variable for the file format
43
+ T = TypeVar("T")
44
+
45
+
46
+ class File(BaseModel, Generic[T], SerializableType):
47
+ """
48
+ A generic file class representing a file with a specified format.
49
+ Provides both async and sync interfaces for file operations. All methods without _sync suffix are async.
50
+
51
+ The class should be instantiated using one of the class methods. The constructor should be used only to
52
+ instantiate references to existing remote objects.
53
+
54
+ The generic type T represents the format of the file.
55
+
56
+ Important methods:
57
+ - `from_existing_remote`: Create a File object from an existing remote file.
58
+ - `new_remote`: Create a new File reference for a remote file that will be written to.
59
+
60
+ **Asynchronous methods**:
61
+ - `open`: Asynchronously open the file and return a file-like object.
62
+ - `download`: Asynchronously download the file to a local path.
63
+ - `from_local`: Asynchronously create a File object from a local file, uploading it to remote storage.
64
+ - `exists`: Asynchronously check if the file exists.
65
+
66
+ **Synchronous methods** (suffixed with `_sync`):
67
+ - `open_sync`: Synchronously open the file and return a file-like object.
68
+ - `download_sync`: Synchronously download the file to a local path.
69
+ - `from_local_sync`: Synchronously create a File object from a local file, uploading it to remote storage.
70
+ - `exists_sync`: Synchronously check if the file exists.
71
+
72
+ Example: Read a file input in a Task (Async).
73
+
74
+ ```python
75
+ @env.task
76
+ async def read_file(file: File) -> str:
77
+ async with file.open("rb") as f:
78
+ content = bytes(await f.read())
79
+ return content.decode("utf-8")
80
+ ```
81
+
82
+ Example: Read a file input in a Task (Sync).
83
+
84
+ ```python
85
+ @env.task
86
+ def read_file_sync(file: File) -> str:
87
+ with file.open_sync("rb") as f:
88
+ content = f.read()
89
+ return content.decode("utf-8")
90
+ ```
91
+
92
+ Example: Write a file by streaming it directly to blob storage (Async).
93
+
94
+ ```python
95
+ @env.task
96
+ async def write_file() -> File:
97
+ file = File.new_remote()
98
+ async with file.open("wb") as f:
99
+ await f.write(b"Hello, World!")
100
+ return file
101
+ ```
102
+
103
+ Example: Upload a local file to remote storage (Async).
104
+
105
+ ```python
106
+ @env.task
107
+ async def upload_file() -> File:
108
+ # Write to local file first
109
+ with open("/tmp/data.csv", "w") as f:
110
+ f.write("col1,col2\\n1,2\\n3,4\\n")
111
+ # Upload to remote storage
112
+ return await File.from_local("/tmp/data.csv")
113
+ ```
114
+
115
+ Example: Upload a local file to remote storage (Sync).
116
+
117
+ ```python
118
+ @env.task
119
+ def upload_file_sync() -> File:
120
+ # Write to local file first
121
+ with open("/tmp/data.csv", "w") as f:
122
+ f.write("col1,col2\\n1,2\\n3,4\\n")
123
+ # Upload to remote storage
124
+ return File.from_local_sync("/tmp/data.csv")
125
+ ```
126
+
127
+ Example: Download a file to local storage (Async).
128
+
129
+ ```python
130
+ @env.task
131
+ async def download_file(file: File) -> str:
132
+ local_path = await file.download()
133
+ # Process the local file
134
+ with open(local_path, "r") as f:
135
+ return f.read()
136
+ ```
137
+
138
+ Example: Download a file to local storage (Sync).
139
+
140
+ ```python
141
+ @env.task
142
+ def download_file_sync(file: File) -> str:
143
+ local_path = file.download_sync()
144
+ # Process the local file
145
+ with open(local_path, "r") as f:
146
+ return f.read()
147
+ ```
148
+
149
+ Example: Reference an existing remote file.
150
+
151
+ ```python
152
+ @env.task
153
+ async def process_existing_file() -> str:
154
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
155
+ async with file.open("rb") as f:
156
+ content = await f.read()
157
+ return content.decode("utf-8")
158
+ ```
159
+
160
+ Example: Check if a file exists (Async).
161
+
162
+ ```python
163
+ @env.task
164
+ async def check_file(file: File) -> bool:
165
+ return await file.exists()
166
+ ```
167
+
168
+ Example: Check if a file exists (Sync).
169
+
170
+ ```python
171
+ @env.task
172
+ def check_file_sync(file: File) -> bool:
173
+ return file.exists_sync()
174
+ ```
175
+
176
+ Example: Pass through a file without copying.
177
+
178
+ ```python
179
+ @env.task
180
+ async def pass_through(file: File) -> File:
181
+ # No copy occurs - just passes the reference
182
+ return file
183
+ ```
184
+
185
+ Args:
186
+ path: The path to the file (can be local or remote)
187
+ name: Optional name for the file (defaults to basename of path)
188
+ """
189
+
190
+ path: str
191
+ name: Optional[str] = None
192
+ format: str = ""
193
+ hash: Optional[str] = None
194
+ hash_method: Annotated[Optional[HashMethod], Field(default=None, exclude=True), SkipJsonSchema()] = None
195
+
196
+ class Config:
197
+ arbitrary_types_allowed = True
198
+
199
+ @model_validator(mode="before")
200
+ @classmethod
201
+ def pre_init(cls, data):
202
+ """Internal: Pydantic validator to set default name from path. Not intended for direct use."""
203
+ if data.get("name") is None:
204
+ data["name"] = Path(data["path"]).name
205
+ return data
206
+
207
+ def _serialize(self) -> Dict[str, Optional[str]]:
208
+ """Internal: Serialize File to dictionary. Not intended for direct use."""
209
+ pyd_dump = self.model_dump()
210
+ return pyd_dump
211
+
212
+ @classmethod
213
+ def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
214
+ """Internal: Deserialize File from dictionary. Not intended for direct use."""
215
+ return File.model_validate(file_dump)
216
+
217
+ @classmethod
218
+ def schema_match(cls, incoming: dict):
219
+ """Internal: Check if incoming schema matches File schema. Not intended for direct use."""
220
+ this_schema = cls.model_json_schema()
221
+ current_required = this_schema.get("required")
222
+ incoming_required = incoming.get("required")
223
+ if (
224
+ current_required
225
+ and incoming_required
226
+ and incoming.get("type") == this_schema.get("type")
227
+ and incoming.get("title") == this_schema.get("title")
228
+ and set(current_required) == set(incoming_required)
229
+ ):
230
+ return True
231
+
232
+ @classmethod
233
+ @requires_initialization
234
+ def new_remote(cls, file_name: Optional[str] = None, hash_method: Optional[HashMethod | str] = None) -> File[T]:
235
+ """
236
+ Create a new File reference for a remote file that will be written to.
237
+
238
+ Use this when you want to create a new file and write to it directly without creating a local file first.
239
+
240
+ Example (Async):
241
+
242
+ ```python
243
+ @env.task
244
+ async def create_csv() -> File:
245
+ df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
246
+ file = File.new_remote()
247
+ async with file.open("wb") as f:
248
+ df.to_csv(f)
249
+ return file
250
+ ```
251
+
252
+ Args:
253
+ file_name: Optional string specifying a remote file name. If not set,
254
+ a generated file name will be returned.
255
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
256
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will be used
257
+ to compute the hash as data is written.
258
+
259
+ Returns:
260
+ A new File instance with a generated remote path
261
+ """
262
+ ctx = internal_ctx()
263
+ known_cache_key = hash_method if isinstance(hash_method, str) else None
264
+ method = hash_method if isinstance(hash_method, HashMethod) else None
265
+
266
+ return cls(
267
+ path=ctx.raw_data.get_random_remote_path(file_name=file_name), hash=known_cache_key, hash_method=method
268
+ )
269
+
270
+ @classmethod
271
+ def from_existing_remote(cls, remote_path: str, file_cache_key: Optional[str] = None) -> File[T]:
272
+ """
273
+ Create a File reference from an existing remote file.
274
+
275
+ Use this when you want to reference a file that already exists in remote storage without uploading it.
276
+
277
+ Example:
278
+
279
+ ```python
280
+ @env.task
281
+ async def process_existing_file() -> str:
282
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
283
+ async with file.open("rb") as f:
284
+ content = await f.read()
285
+ return content.decode("utf-8")
286
+ ```
287
+
288
+ Args:
289
+ remote_path: The remote path to the existing file
290
+ file_cache_key: Optional hash value to use for cache key computation. If not specified, the cache key
291
+ will be computed based on the file's attributes (path, name, format).
292
+
293
+ Returns:
294
+ A new File instance pointing to the existing remote file
295
+ """
296
+ return cls(path=remote_path, hash=file_cache_key)
297
+
298
+ @asynccontextmanager
299
+ async def open(
300
+ self,
301
+ mode: str = "rb",
302
+ block_size: Optional[int] = None,
303
+ cache_type: str = "readahead",
304
+ cache_options: Optional[dict] = None,
305
+ compression: Optional[str] = None,
306
+ **kwargs,
307
+ ) -> AsyncGenerator[Union[AsyncWritableFile, AsyncReadableFile, "HashingWriter"], None]:
308
+ """
309
+ Asynchronously open the file and return a file-like object.
310
+
311
+ Use this method in async tasks to read from or write to files directly.
312
+
313
+ Example (Async Read):
314
+
315
+ ```python
316
+ @env.task
317
+ async def read_file(f: File) -> str:
318
+ async with f.open("rb") as fh:
319
+ content = bytes(await fh.read())
320
+ return content.decode("utf-8")
321
+ ```
322
+
323
+ Example (Async Write):
324
+
325
+ ```python
326
+ @env.task
327
+ async def write_file() -> File:
328
+ f = File.new_remote()
329
+ async with f.open("wb") as fh:
330
+ await fh.write(b"Hello, World!")
331
+ return f
332
+ ```
333
+
334
+ Example (Streaming Read):
335
+
336
+ ```python
337
+ @env.task
338
+ async def stream_read(f: File) -> str:
339
+ content_parts = []
340
+ async with f.open("rb", block_size=1024) as fh:
341
+ while True:
342
+ chunk = await fh.read()
343
+ if not chunk:
344
+ break
345
+ content_parts.append(chunk)
346
+ return b"".join(content_parts).decode("utf-8")
347
+ ```
348
+
349
+ Args:
350
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
351
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
352
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
353
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
354
+ cache_options: Dictionary of options for the cache
355
+ compression: Compression format or None for auto-detection
356
+ **kwargs: Additional arguments passed to fsspec's open method
357
+
358
+ Returns:
359
+ An async file-like object that can be used with async read/write operations
360
+ """
361
+ # Check if we should use obstore bypass
362
+ try:
363
+ fh = await storage.open(
364
+ self.path,
365
+ mode=mode,
366
+ cache_type=cache_type,
367
+ cache_options=cache_options,
368
+ compression=compression,
369
+ block_size=block_size,
370
+ **kwargs,
371
+ )
372
+ try:
373
+ yield fh
374
+ return
375
+ finally:
376
+ if inspect.iscoroutinefunction(fh.close):
377
+ await fh.close()
378
+ else:
379
+ fh.close()
380
+ except flyte.errors.OnlyAsyncIOSupportedError:
381
+ # Fall back to aiofiles
382
+ fs = storage.get_underlying_filesystem(path=self.path)
383
+ if "file" in fs.protocol:
384
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
385
+ yield f
386
+ return
387
+ raise
388
+
389
+ async def exists(self) -> bool:
390
+ """
391
+ Asynchronously check if the file exists.
392
+
393
+ Example (Async):
394
+
395
+ ```python
396
+ @env.task
397
+ async def check_file(f: File) -> bool:
398
+ if await f.exists():
399
+ print("File exists!")
400
+ return True
401
+ return False
402
+ ```
403
+
404
+ Returns:
405
+ True if the file exists, False otherwise
406
+ """
407
+ return await storage.exists(self.path)
408
+
409
+ def exists_sync(self) -> bool:
410
+ """
411
+ Synchronously check if the file exists.
412
+
413
+ Use this in non-async tasks or when you need synchronous file existence checking.
414
+
415
+ Example (Sync):
416
+
417
+ ```python
418
+ @env.task
419
+ def check_file_sync(f: File) -> bool:
420
+ if f.exists_sync():
421
+ print("File exists!")
422
+ return True
423
+ return False
424
+ ```
425
+
426
+ Returns:
427
+ True if the file exists, False otherwise
428
+ """
429
+ return storage.exists_sync(self.path)
430
+
431
+ @contextmanager
432
+ def open_sync(
433
+ self,
434
+ mode: str = "rb",
435
+ block_size: Optional[int] = None,
436
+ cache_type: str = "readahead",
437
+ cache_options: Optional[dict] = None,
438
+ compression: Optional[str] = None,
439
+ **kwargs,
440
+ ) -> Generator[IO[Any], None, None]:
441
+ """
442
+ Synchronously open the file and return a file-like object.
443
+
444
+ Use this method in non-async tasks to read from or write to files directly.
445
+
446
+ Example (Sync Read):
447
+
448
+ ```python
449
+ @env.task
450
+ def read_file_sync(f: File) -> str:
451
+ with f.open_sync("rb") as fh:
452
+ content = fh.read()
453
+ return content.decode("utf-8")
454
+ ```
455
+
456
+ Example (Sync Write):
457
+
458
+ ```python
459
+ @env.task
460
+ def write_file_sync() -> File:
461
+ f = File.new_remote()
462
+ with f.open_sync("wb") as fh:
463
+ fh.write(b"Hello, World!")
464
+ return f
465
+ ```
466
+
467
+ Args:
468
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
469
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
470
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
471
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
472
+ cache_options: Dictionary of options for the cache
473
+ compression: Compression format or None for auto-detection
474
+ **kwargs: Additional arguments passed to fsspec's open method
475
+
476
+ Returns:
477
+ A file-like object that can be used with standard read/write operations
478
+ """
479
+ fs = storage.get_underlying_filesystem(path=self.path)
480
+
481
+ # Set up cache options if provided
482
+ if cache_options is None:
483
+ cache_options = {}
484
+
485
+ # Configure the open parameters
486
+ open_kwargs = {"mode": mode, "compression": compression, **kwargs}
487
+
488
+ if block_size:
489
+ open_kwargs["block_size"] = block_size
490
+
491
+ # Apply caching strategy
492
+ if cache_type != "none":
493
+ open_kwargs["cache_type"] = cache_type
494
+ open_kwargs["cache_options"] = cache_options
495
+
496
+ with fs.open(self.path, **open_kwargs) as f:
497
+ yield f
498
+
499
+ # TODO sync needs to be implemented
500
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
501
+ """
502
+ Asynchronously download the file to a local path.
503
+
504
+ Use this when you need to download a remote file to your local filesystem for processing.
505
+
506
+ Example (Async):
507
+
508
+ ```python
509
+ @env.task
510
+ async def download_and_process(f: File) -> str:
511
+ local_path = await f.download()
512
+ # Now process the local file
513
+ with open(local_path, "r") as fh:
514
+ return fh.read()
515
+ ```
516
+
517
+ Example (Download to specific path):
518
+
519
+ ```python
520
+ @env.task
521
+ async def download_to_path(f: File) -> str:
522
+ local_path = await f.download("/tmp/myfile.csv")
523
+ return local_path
524
+ ```
525
+
526
+ Args:
527
+ local_path: The local path to download the file to. If None, a temporary
528
+ directory will be used and a path will be generated.
529
+
530
+ Returns:
531
+ The absolute path to the downloaded file
532
+ """
533
+ if local_path is None:
534
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
535
+ else:
536
+ # Preserve trailing separator if present (Path.absolute() strips it)
537
+ local_path_str = str(local_path)
538
+ has_trailing_sep = local_path_str.endswith(os.sep)
539
+ local_path = str(Path(local_path).absolute())
540
+ if has_trailing_sep:
541
+ local_path = local_path + os.sep
542
+
543
+ fs = storage.get_underlying_filesystem(path=self.path)
544
+
545
+ # If it's already a local file, just copy it
546
+ if "file" in fs.protocol:
547
+ # Apply directory logic for local-to-local copies
548
+ local_path_for_copy = local_path
549
+ if isinstance(local_path, str):
550
+ local_path_obj = Path(local_path)
551
+ # Check if it's a directory or ends with separator
552
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
553
+ remote_filename = Path(self.path).name
554
+ local_path_for_copy = str(local_path_obj / remote_filename)
555
+
556
+ # Ensure parent directory exists
557
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
558
+
559
+ # Use aiofiles for async copy
560
+ async with aiofiles.open(self.path, "rb") as src:
561
+ async with aiofiles.open(local_path_for_copy, "wb") as dst:
562
+ await dst.write(await src.read())
563
+ return str(local_path_for_copy)
564
+
565
+ # Otherwise download from remote using async functionality
566
+ result_path = await storage.get(self.path, str(local_path))
567
+ return result_path
568
+
569
+ def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
570
+ """
571
+ Synchronously download the file to a local path.
572
+
573
+ Use this in non-async tasks when you need to download a remote file to your local filesystem.
574
+
575
+ Example (Sync):
576
+
577
+ ```python
578
+ @env.task
579
+ def download_and_process_sync(f: File) -> str:
580
+ local_path = f.download_sync()
581
+ # Now process the local file
582
+ with open(local_path, "r") as fh:
583
+ return fh.read()
584
+ ```
585
+
586
+ Example (Download to specific path):
587
+
588
+ ```python
589
+ @env.task
590
+ def download_to_path_sync(f: File) -> str:
591
+ local_path = f.download_sync("/tmp/myfile.csv")
592
+ return local_path
593
+ ```
594
+
595
+ Args:
596
+ local_path: The local path to download the file to. If None, a temporary
597
+ directory will be used and a path will be generated.
598
+
599
+ Returns:
600
+ The absolute path to the downloaded file
601
+ """
602
+ if local_path is None:
603
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
604
+ else:
605
+ # Preserve trailing separator if present (Path.absolute() strips it)
606
+ local_path_str = str(local_path)
607
+ has_trailing_sep = local_path_str.endswith(os.sep)
608
+ local_path = str(Path(local_path).absolute())
609
+ if has_trailing_sep:
610
+ local_path = local_path + os.sep
611
+
612
+ fs = storage.get_underlying_filesystem(path=self.path)
613
+
614
+ # If it's already a local file, just copy it
615
+ if "file" in fs.protocol:
616
+ # Apply directory logic for local-to-local copies
617
+ local_path_for_copy = local_path
618
+ if isinstance(local_path, str):
619
+ local_path_obj = Path(local_path)
620
+ # Check if it's a directory or ends with separator
621
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
622
+ remote_filename = Path(self.path).name
623
+ local_path_for_copy = str(local_path_obj / remote_filename)
624
+
625
+ # Ensure parent directory exists
626
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
627
+
628
+ # Use standard file operations for sync copy
629
+ import shutil
630
+
631
+ shutil.copy2(self.path, local_path_for_copy)
632
+ return str(local_path_for_copy)
633
+
634
+ # Otherwise download from remote using sync functionality
635
+ # Use the sync version of storage operations
636
+ with fs.open(self.path, "rb") as src:
637
+ with open(local_path, "wb") as dst:
638
+ dst.write(src.read())
639
+ return str(local_path)
640
+
641
+ @classmethod
642
+ @requires_initialization
643
+ def from_local_sync(
644
+ cls,
645
+ local_path: Union[str, Path],
646
+ remote_destination: Optional[str] = None,
647
+ hash_method: Optional[HashMethod | str] = None,
648
+ ) -> File[T]:
649
+ """
650
+ Synchronously create a new File object from a local file by uploading it to remote storage.
651
+
652
+ Use this in non-async tasks when you have a local file that needs to be uploaded to remote storage.
653
+
654
+ Example (Sync):
655
+
656
+ ```python
657
+ @env.task
658
+ def upload_local_file_sync() -> File:
659
+ # Create a local file
660
+ with open("/tmp/data.csv", "w") as f:
661
+ f.write("col1,col2\n1,2\n3,4\n")
662
+
663
+ # Upload to remote storage
664
+ remote_file = File.from_local_sync("/tmp/data.csv")
665
+ return remote_file
666
+ ```
667
+
668
+ Example (With specific destination):
669
+
670
+ ```python
671
+ @env.task
672
+ def upload_to_specific_path() -> File:
673
+ remote_file = File.from_local_sync("/tmp/data.csv", "s3://my-bucket/data.csv")
674
+ return remote_file
675
+ ```
676
+
677
+ Args:
678
+ local_path: Path to the local file
679
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
680
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
681
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
682
+ the hash during upload. If not specified, the cache key will be based on file attributes.
683
+
684
+ Returns:
685
+ A new File instance pointing to the uploaded remote file
686
+ """
687
+ if not os.path.exists(local_path):
688
+ raise ValueError(f"File not found: {local_path}")
689
+
690
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path()
691
+ protocol = get_protocol(remote_path)
692
+ filename = Path(local_path).name
693
+
694
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
695
+ # then let's optimize by not uploading.
696
+ hash_value = hash_method if isinstance(hash_method, str) else None
697
+ hash_method_obj = hash_method if isinstance(hash_method, HashMethod) else None
698
+
699
+ if "file" in protocol:
700
+ if remote_destination is None:
701
+ path = str(Path(local_path).absolute())
702
+ else:
703
+ # Otherwise, actually make a copy of the file
704
+ import shutil
705
+
706
+ if hash_method_obj:
707
+ # For hash computation, we need to read and write manually
708
+ with open(local_path, "rb") as src:
709
+ with open(remote_path, "wb") as dst:
710
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method_obj)
711
+ dst_wrapper.write(src.read())
712
+ hash_value = dst_wrapper.result()
713
+ dst_wrapper.close()
714
+ else:
715
+ shutil.copy2(local_path, remote_path)
716
+ path = str(Path(remote_path).absolute())
717
+ else:
718
+ # Otherwise upload to remote using sync storage layer
719
+ fs = storage.get_underlying_filesystem(path=remote_path)
720
+
721
+ if hash_method_obj:
722
+ # We can skip the wrapper if the hash method is just a precomputed value
723
+ if not isinstance(hash_method_obj, PrecomputedValue):
724
+ with open(local_path, "rb") as src:
725
+ # For sync operations, we need to compute hash manually
726
+ data = src.read()
727
+ hash_method_obj.update(memoryview(data))
728
+ hash_value = hash_method_obj.result()
729
+
730
+ # Now write the data to remote
731
+ with fs.open(remote_path, "wb") as dst:
732
+ dst.write(data)
733
+ path = remote_path
734
+ else:
735
+ # Use sync file operations
736
+ with open(local_path, "rb") as src:
737
+ with fs.open(remote_path, "wb") as dst:
738
+ dst.write(src.read())
739
+ path = remote_path
740
+ hash_value = hash_method_obj.result()
741
+ else:
742
+ # Simple sync copy
743
+ with open(local_path, "rb") as src:
744
+ with fs.open(remote_path, "wb") as dst:
745
+ dst.write(src.read())
746
+ path = remote_path
747
+
748
+ f = cls(path=path, name=filename, hash_method=hash_method_obj, hash=hash_value)
749
+ return f
750
+
751
+ @classmethod
752
+ @requires_initialization
753
+ async def from_local(
754
+ cls,
755
+ local_path: Union[str, Path],
756
+ remote_destination: Optional[str] = None,
757
+ hash_method: Optional[HashMethod | str] = None,
758
+ ) -> File[T]:
759
+ """
760
+ Asynchronously create a new File object from a local file by uploading it to remote storage.
761
+
762
+ Use this in async tasks when you have a local file that needs to be uploaded to remote storage.
763
+
764
+ Example (Async):
765
+
766
+ ```python
767
+ @env.task
768
+ async def upload_local_file() -> File:
769
+ # Create a local file
770
+ async with aiofiles.open("/tmp/data.csv", "w") as f:
771
+ await f.write("col1,col2\n1,2\n3,4\n")
772
+
773
+ # Upload to remote storage
774
+ remote_file = await File.from_local("/tmp/data.csv")
775
+ return remote_file
776
+ ```
777
+
778
+ Example (With specific destination):
779
+
780
+ ```python
781
+ @env.task
782
+ async def upload_to_specific_path() -> File:
783
+ remote_file = await File.from_local("/tmp/data.csv", "s3://my-bucket/data.csv")
784
+ return remote_file
785
+ ```
786
+
787
+ Args:
788
+ local_path: Path to the local file
789
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
790
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
791
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
792
+ the hash during upload. If not specified, the cache key will be based on file attributes.
793
+
794
+ Returns:
795
+ A new File instance pointing to the uploaded remote file
796
+ """
797
+ if not os.path.exists(local_path):
798
+ raise ValueError(f"File not found: {local_path}")
799
+
800
+ filename = Path(local_path).name
801
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(filename)
802
+ protocol = get_protocol(remote_path)
803
+
804
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
805
+ # then let's optimize by not uploading.
806
+ hash_value = hash_method if isinstance(hash_method, str) else None
807
+ hash_method = hash_method if isinstance(hash_method, HashMethod) else None
808
+ if "file" in protocol:
809
+ if remote_destination is None:
810
+ path = str(Path(local_path).absolute())
811
+ else:
812
+ # Otherwise, actually make a copy of the file
813
+ async with aiofiles.open(local_path, "rb") as src:
814
+ async with aiofiles.open(remote_path, "wb") as dst:
815
+ if hash_method:
816
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method)
817
+ await dst_wrapper.write(await src.read())
818
+ hash_value = dst_wrapper.result()
819
+ else:
820
+ await dst.write(await src.read())
821
+ path = str(Path(remote_path).absolute())
822
+ else:
823
+ # Otherwise upload to remote using async storage layer
824
+ if hash_method:
825
+ # We can skip the wrapper if the hash method is just a precomputed value
826
+ if not isinstance(hash_method, PrecomputedValue):
827
+ async with aiofiles.open(local_path, "rb") as src:
828
+ src_wrapper = AsyncHashingReader(src, accumulator=hash_method)
829
+ path = await storage.put_stream(src_wrapper, to_path=remote_path)
830
+ hash_value = src_wrapper.result()
831
+ else:
832
+ path = await storage.put(str(local_path), remote_path)
833
+ hash_value = hash_method.result()
834
+ else:
835
+ path = await storage.put(str(local_path), remote_path)
836
+
837
+ f = cls(path=path, name=filename, hash_method=hash_method, hash=hash_value)
838
+ return f
839
+
840
+
841
+ class FileTransformer(TypeTransformer[File]):
842
+ """
843
+ Transformer for File objects. This type transformer does not handle any i/o. That is now the responsibility of the
844
+ user.
845
+ """
846
+
847
+ def __init__(self):
848
+ super().__init__(name="File", t=File)
849
+
850
+ def get_literal_type(self, t: Type[File]) -> types_pb2.LiteralType:
851
+ """Get the Flyte literal type for a File type."""
852
+ return types_pb2.LiteralType(
853
+ blob=types_pb2.BlobType(
854
+ # todo: set format from generic
855
+ format="", # Format is determined by the generic type T
856
+ dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE,
857
+ )
858
+ )
859
+
860
+ async def to_literal(
861
+ self,
862
+ python_val: File,
863
+ python_type: Type[File],
864
+ expected: types_pb2.LiteralType,
865
+ ) -> literals_pb2.Literal:
866
+ """Convert a File object to a Flyte literal."""
867
+ if not isinstance(python_val, File):
868
+ raise TypeTransformerFailedError(f"Expected File object, received {type(python_val)}")
869
+
870
+ return literals_pb2.Literal(
871
+ scalar=literals_pb2.Scalar(
872
+ blob=literals_pb2.Blob(
873
+ metadata=literals_pb2.BlobMetadata(
874
+ type=types_pb2.BlobType(
875
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
876
+ )
877
+ ),
878
+ uri=python_val.path,
879
+ )
880
+ ),
881
+ hash=python_val.hash if python_val.hash else None,
882
+ )
883
+
884
+ async def to_python_value(
885
+ self,
886
+ lv: literals_pb2.Literal,
887
+ expected_python_type: Type[File],
888
+ ) -> File:
889
+ """Convert a Flyte literal to a File object."""
890
+ if not lv.scalar.HasField("blob"):
891
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
892
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE:
893
+ raise TypeTransformerFailedError(
894
+ f"Expected single part blob, received {lv.scalar.blob.metadata.type.dimensionality}"
895
+ )
896
+
897
+ uri = lv.scalar.blob.uri
898
+ filename = Path(uri).name
899
+ hash_value = lv.hash if lv.hash else None
900
+ f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format, hash=hash_value)
901
+ return f
902
+
903
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
904
+ """Guess the Python type from a Flyte literal type."""
905
+ if (
906
+ literal_type.HasField("blob")
907
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
908
+ and literal_type.blob.format != "PythonPickle" # see pickle transformer
909
+ ):
910
+ return File
911
+ raise ValueError(f"Cannot guess python type from {literal_type}")
912
+
913
+
914
+ TypeEngine.register(FileTransformer())