flyte 0.1.0__py3-none-any.whl → 0.2.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (219) hide show
  1. flyte/__init__.py +78 -2
  2. flyte/_bin/__init__.py +0 -0
  3. flyte/_bin/runtime.py +152 -0
  4. flyte/_build.py +26 -0
  5. flyte/_cache/__init__.py +12 -0
  6. flyte/_cache/cache.py +145 -0
  7. flyte/_cache/defaults.py +9 -0
  8. flyte/_cache/policy_function_body.py +42 -0
  9. flyte/_code_bundle/__init__.py +8 -0
  10. flyte/_code_bundle/_ignore.py +113 -0
  11. flyte/_code_bundle/_packaging.py +187 -0
  12. flyte/_code_bundle/_utils.py +323 -0
  13. flyte/_code_bundle/bundle.py +209 -0
  14. flyte/_context.py +152 -0
  15. flyte/_deploy.py +243 -0
  16. flyte/_doc.py +29 -0
  17. flyte/_docstring.py +32 -0
  18. flyte/_environment.py +84 -0
  19. flyte/_excepthook.py +37 -0
  20. flyte/_group.py +32 -0
  21. flyte/_hash.py +23 -0
  22. flyte/_image.py +762 -0
  23. flyte/_initialize.py +492 -0
  24. flyte/_interface.py +84 -0
  25. flyte/_internal/__init__.py +3 -0
  26. flyte/_internal/controllers/__init__.py +128 -0
  27. flyte/_internal/controllers/_local_controller.py +193 -0
  28. flyte/_internal/controllers/_trace.py +41 -0
  29. flyte/_internal/controllers/remote/__init__.py +60 -0
  30. flyte/_internal/controllers/remote/_action.py +146 -0
  31. flyte/_internal/controllers/remote/_client.py +47 -0
  32. flyte/_internal/controllers/remote/_controller.py +494 -0
  33. flyte/_internal/controllers/remote/_core.py +410 -0
  34. flyte/_internal/controllers/remote/_informer.py +361 -0
  35. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  36. flyte/_internal/imagebuild/__init__.py +11 -0
  37. flyte/_internal/imagebuild/docker_builder.py +427 -0
  38. flyte/_internal/imagebuild/image_builder.py +246 -0
  39. flyte/_internal/imagebuild/remote_builder.py +0 -0
  40. flyte/_internal/resolvers/__init__.py +0 -0
  41. flyte/_internal/resolvers/_task_module.py +54 -0
  42. flyte/_internal/resolvers/common.py +31 -0
  43. flyte/_internal/resolvers/default.py +28 -0
  44. flyte/_internal/runtime/__init__.py +0 -0
  45. flyte/_internal/runtime/convert.py +342 -0
  46. flyte/_internal/runtime/entrypoints.py +135 -0
  47. flyte/_internal/runtime/io.py +136 -0
  48. flyte/_internal/runtime/resources_serde.py +138 -0
  49. flyte/_internal/runtime/task_serde.py +330 -0
  50. flyte/_internal/runtime/taskrunner.py +191 -0
  51. flyte/_internal/runtime/types_serde.py +54 -0
  52. flyte/_logging.py +135 -0
  53. flyte/_map.py +215 -0
  54. flyte/_pod.py +19 -0
  55. flyte/_protos/__init__.py +0 -0
  56. flyte/_protos/common/authorization_pb2.py +66 -0
  57. flyte/_protos/common/authorization_pb2.pyi +108 -0
  58. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  59. flyte/_protos/common/identifier_pb2.py +71 -0
  60. flyte/_protos/common/identifier_pb2.pyi +82 -0
  61. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  62. flyte/_protos/common/identity_pb2.py +48 -0
  63. flyte/_protos/common/identity_pb2.pyi +72 -0
  64. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  65. flyte/_protos/common/list_pb2.py +36 -0
  66. flyte/_protos/common/list_pb2.pyi +71 -0
  67. flyte/_protos/common/list_pb2_grpc.py +4 -0
  68. flyte/_protos/common/policy_pb2.py +37 -0
  69. flyte/_protos/common/policy_pb2.pyi +27 -0
  70. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  71. flyte/_protos/common/role_pb2.py +37 -0
  72. flyte/_protos/common/role_pb2.pyi +53 -0
  73. flyte/_protos/common/role_pb2_grpc.py +4 -0
  74. flyte/_protos/common/runtime_version_pb2.py +28 -0
  75. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  76. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  77. flyte/_protos/logs/dataplane/payload_pb2.py +100 -0
  78. flyte/_protos/logs/dataplane/payload_pb2.pyi +177 -0
  79. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  80. flyte/_protos/secret/definition_pb2.py +49 -0
  81. flyte/_protos/secret/definition_pb2.pyi +93 -0
  82. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  83. flyte/_protos/secret/payload_pb2.py +62 -0
  84. flyte/_protos/secret/payload_pb2.pyi +94 -0
  85. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  86. flyte/_protos/secret/secret_pb2.py +38 -0
  87. flyte/_protos/secret/secret_pb2.pyi +6 -0
  88. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  89. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  90. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  91. flyte/_protos/workflow/common_pb2.py +27 -0
  92. flyte/_protos/workflow/common_pb2.pyi +14 -0
  93. flyte/_protos/workflow/common_pb2_grpc.py +4 -0
  94. flyte/_protos/workflow/environment_pb2.py +29 -0
  95. flyte/_protos/workflow/environment_pb2.pyi +12 -0
  96. flyte/_protos/workflow/environment_pb2_grpc.py +4 -0
  97. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  98. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  99. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  100. flyte/_protos/workflow/queue_service_pb2.py +105 -0
  101. flyte/_protos/workflow/queue_service_pb2.pyi +146 -0
  102. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  103. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  104. flyte/_protos/workflow/run_definition_pb2.pyi +314 -0
  105. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  106. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  107. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  108. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  109. flyte/_protos/workflow/run_service_pb2.py +129 -0
  110. flyte/_protos/workflow/run_service_pb2.pyi +171 -0
  111. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  112. flyte/_protos/workflow/state_service_pb2.py +66 -0
  113. flyte/_protos/workflow/state_service_pb2.pyi +75 -0
  114. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  115. flyte/_protos/workflow/task_definition_pb2.py +79 -0
  116. flyte/_protos/workflow/task_definition_pb2.pyi +81 -0
  117. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  118. flyte/_protos/workflow/task_service_pb2.py +60 -0
  119. flyte/_protos/workflow/task_service_pb2.pyi +59 -0
  120. flyte/_protos/workflow/task_service_pb2_grpc.py +138 -0
  121. flyte/_resources.py +226 -0
  122. flyte/_retry.py +32 -0
  123. flyte/_reusable_environment.py +25 -0
  124. flyte/_run.py +482 -0
  125. flyte/_secret.py +61 -0
  126. flyte/_task.py +449 -0
  127. flyte/_task_environment.py +183 -0
  128. flyte/_timeout.py +47 -0
  129. flyte/_tools.py +27 -0
  130. flyte/_trace.py +120 -0
  131. flyte/_utils/__init__.py +26 -0
  132. flyte/_utils/asyn.py +119 -0
  133. flyte/_utils/async_cache.py +139 -0
  134. flyte/_utils/coro_management.py +23 -0
  135. flyte/_utils/file_handling.py +72 -0
  136. flyte/_utils/helpers.py +134 -0
  137. flyte/_utils/lazy_module.py +54 -0
  138. flyte/_utils/org_discovery.py +57 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/cli/__init__.py +3 -0
  142. flyte/cli/_abort.py +28 -0
  143. flyte/cli/_common.py +337 -0
  144. flyte/cli/_create.py +145 -0
  145. flyte/cli/_delete.py +23 -0
  146. flyte/cli/_deploy.py +152 -0
  147. flyte/cli/_gen.py +163 -0
  148. flyte/cli/_get.py +310 -0
  149. flyte/cli/_params.py +538 -0
  150. flyte/cli/_run.py +231 -0
  151. flyte/cli/main.py +166 -0
  152. flyte/config/__init__.py +3 -0
  153. flyte/config/_config.py +216 -0
  154. flyte/config/_internal.py +64 -0
  155. flyte/config/_reader.py +207 -0
  156. flyte/connectors/__init__.py +0 -0
  157. flyte/errors.py +172 -0
  158. flyte/extras/__init__.py +5 -0
  159. flyte/extras/_container.py +263 -0
  160. flyte/io/__init__.py +27 -0
  161. flyte/io/_dir.py +448 -0
  162. flyte/io/_file.py +467 -0
  163. flyte/io/_structured_dataset/__init__.py +129 -0
  164. flyte/io/_structured_dataset/basic_dfs.py +219 -0
  165. flyte/io/_structured_dataset/structured_dataset.py +1061 -0
  166. flyte/models.py +391 -0
  167. flyte/remote/__init__.py +26 -0
  168. flyte/remote/_client/__init__.py +0 -0
  169. flyte/remote/_client/_protocols.py +133 -0
  170. flyte/remote/_client/auth/__init__.py +12 -0
  171. flyte/remote/_client/auth/_auth_utils.py +14 -0
  172. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  173. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  174. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  175. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  176. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  177. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  178. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  179. flyte/remote/_client/auth/_channel.py +215 -0
  180. flyte/remote/_client/auth/_client_config.py +83 -0
  181. flyte/remote/_client/auth/_default_html.py +32 -0
  182. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  183. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  184. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  185. flyte/remote/_client/auth/_keyring.py +143 -0
  186. flyte/remote/_client/auth/_token_client.py +260 -0
  187. flyte/remote/_client/auth/errors.py +16 -0
  188. flyte/remote/_client/controlplane.py +95 -0
  189. flyte/remote/_console.py +18 -0
  190. flyte/remote/_data.py +159 -0
  191. flyte/remote/_logs.py +176 -0
  192. flyte/remote/_project.py +85 -0
  193. flyte/remote/_run.py +970 -0
  194. flyte/remote/_secret.py +132 -0
  195. flyte/remote/_task.py +391 -0
  196. flyte/report/__init__.py +3 -0
  197. flyte/report/_report.py +178 -0
  198. flyte/report/_template.html +124 -0
  199. flyte/storage/__init__.py +29 -0
  200. flyte/storage/_config.py +233 -0
  201. flyte/storage/_remote_fs.py +34 -0
  202. flyte/storage/_storage.py +271 -0
  203. flyte/storage/_utils.py +5 -0
  204. flyte/syncify/__init__.py +56 -0
  205. flyte/syncify/_api.py +371 -0
  206. flyte/types/__init__.py +36 -0
  207. flyte/types/_interface.py +40 -0
  208. flyte/types/_pickle.py +118 -0
  209. flyte/types/_renderer.py +162 -0
  210. flyte/types/_string_literals.py +120 -0
  211. flyte/types/_type_engine.py +2287 -0
  212. flyte/types/_utils.py +80 -0
  213. flyte-0.2.0a0.dist-info/METADATA +249 -0
  214. flyte-0.2.0a0.dist-info/RECORD +218 -0
  215. {flyte-0.1.0.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +2 -1
  216. flyte-0.2.0a0.dist-info/entry_points.txt +3 -0
  217. flyte-0.2.0a0.dist-info/top_level.txt +1 -0
  218. flyte-0.1.0.dist-info/METADATA +0 -6
  219. flyte-0.1.0.dist-info/RECORD +0 -5
flyte/io/_file.py ADDED
@@ -0,0 +1,467 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from contextlib import asynccontextmanager, contextmanager
5
+ from pathlib import Path
6
+ from typing import (
7
+ IO,
8
+ Any,
9
+ AsyncGenerator,
10
+ Dict,
11
+ Generator,
12
+ Generic,
13
+ Optional,
14
+ Type,
15
+ TypeVar,
16
+ Union,
17
+ )
18
+
19
+ import aiofiles
20
+ from flyteidl.core import literals_pb2, types_pb2
21
+ from fsspec.asyn import AsyncFileSystem
22
+ from fsspec.utils import get_protocol
23
+ from mashumaro.types import SerializableType
24
+ from pydantic import BaseModel, model_validator
25
+
26
+ import flyte.storage as storage
27
+ from flyte._context import internal_ctx
28
+ from flyte._initialize import requires_initialization
29
+ from flyte._logging import logger
30
+ from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
31
+
32
+ # Type variable for the file format
33
+ T = TypeVar("T")
34
+
35
+
36
+ class File(BaseModel, Generic[T], SerializableType):
37
+ """
38
+ A generic file class representing a file with a specified format.
39
+ Provides both async and sync interfaces for file operations.
40
+ Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
41
+
42
+ The generic type T represents the format of the file.
43
+
44
+ Example:
45
+ ```python
46
+ # Async usage
47
+ from pandas import DataFrame
48
+ csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
49
+
50
+ async with csv_file.open() as f:
51
+ content = await f.read()
52
+
53
+ # Sync alternative
54
+ with csv_file.open_sync() as f:
55
+ content = f.read()
56
+ ```
57
+
58
+ Example: Read a file input in a Task.
59
+ ```
60
+ @env.task
61
+ async def my_task(file: File[DataFrame]):
62
+ async with file.open() as f:
63
+ df = pd.read_csv(f)
64
+ ```
65
+
66
+ Example: Write a file by streaming it directly to blob storage
67
+ ```
68
+ @env.task
69
+ async def my_task() -> File[DataFrame]:
70
+ df = pd.DataFrame(...)
71
+ file = File.new_remote()
72
+ async with file.open("wb") as f:
73
+ df.to_csv(f)
74
+ # No additional uploading will be done here.
75
+ return file
76
+ ```
77
+ Example: Write a file by writing it locally first, and then uploading it.
78
+ ```
79
+ @env.task
80
+ async def my_task() -> File[DataFrame]:
81
+ # write to /tmp/data.csv
82
+ return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
83
+ ```
84
+
85
+ Example: From an existing remote file
86
+ ```
87
+ @env.task
88
+ async def my_task() -> File[DataFrame]:
89
+ return File.from_existing_remote("s3://my-bucket/data.csv")
90
+ ```
91
+
92
+ Example: Take a remote file as input and return the same one, should not do any copy
93
+ ```
94
+ @env.task
95
+ async def my_task(file: File[DataFrame]) -> File[DataFrame]:
96
+ return file
97
+ ```
98
+
99
+ Args:
100
+ path: The path to the file (can be local or remote)
101
+ name: Optional name for the file (defaults to basename of path)
102
+ """
103
+
104
+ path: str
105
+ name: Optional[str] = None
106
+ format: str = ""
107
+
108
+ class Config:
109
+ arbitrary_types_allowed = True
110
+
111
+ @model_validator(mode="before")
112
+ @classmethod
113
+ def pre_init(cls, data):
114
+ if data.get("name") is None:
115
+ data["name"] = Path(data["path"]).name
116
+ return data
117
+
118
+ def _serialize(self) -> Dict[str, Optional[str]]:
119
+ pyd_dump = self.model_dump()
120
+ return pyd_dump
121
+
122
+ @classmethod
123
+ def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
124
+ return File.model_validate(file_dump)
125
+
126
+ @classmethod
127
+ def schema_match(cls, incoming: dict):
128
+ this_schema = cls.model_json_schema()
129
+ current_required = this_schema.get("required")
130
+ incoming_required = incoming.get("required")
131
+ if (
132
+ current_required
133
+ and incoming_required
134
+ and incoming.get("type") == this_schema.get("type")
135
+ and incoming.get("title") == this_schema.get("title")
136
+ and set(current_required) == set(incoming_required)
137
+ ):
138
+ return True
139
+
140
+ @classmethod
141
+ @requires_initialization
142
+ def new_remote(cls) -> File[T]:
143
+ """
144
+ Create a new File reference for a remote file that will be written to.
145
+
146
+ Example:
147
+ ```
148
+ @env.task
149
+ async def my_task() -> File[DataFrame]:
150
+ df = pd.DataFrame(...)
151
+ file = File.new_remote()
152
+ async with file.open("wb") as f:
153
+ df.to_csv(f)
154
+ return file
155
+ ```
156
+ """
157
+ ctx = internal_ctx()
158
+
159
+ return cls(path=ctx.raw_data.get_random_remote_path())
160
+
161
+ @classmethod
162
+ def from_existing_remote(cls, remote_path: str) -> File[T]:
163
+ """
164
+ Create a File reference from an existing remote file.
165
+
166
+ Example:
167
+ ```python
168
+ @env.task
169
+ async def my_task() -> File[DataFrame]:
170
+ return File.from_existing_remote("s3://my-bucket/data.csv")
171
+ ```
172
+
173
+ Args:
174
+ remote_path: The remote path to the existing file
175
+ """
176
+ return cls(path=remote_path)
177
+
178
+ @asynccontextmanager
179
+ async def open(
180
+ self,
181
+ mode: str = "rb",
182
+ block_size: Optional[int] = None,
183
+ cache_type: str = "readahead",
184
+ cache_options: Optional[dict] = None,
185
+ compression: Optional[str] = None,
186
+ **kwargs,
187
+ ) -> AsyncGenerator[IO[Any]]:
188
+ """
189
+ Asynchronously open the file and return a file-like object.
190
+
191
+ Args:
192
+ mode: The mode to open the file in (default: 'rb')
193
+ block_size: Size of blocks for reading (bytes)
194
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
195
+ cache_options: Dictionary of options for the cache
196
+ compression: Compression format or None for auto-detection
197
+ **kwargs: Additional arguments passed to fsspec's open method
198
+
199
+ Returns:
200
+ An async file-like object
201
+
202
+ Example:
203
+ ```python
204
+ async with file.open('rb') as f:
205
+ data = await f.read()
206
+ ```
207
+ """
208
+ fs = storage.get_underlying_filesystem(path=self.path)
209
+
210
+ # Set up cache options if provided
211
+ if cache_options is None:
212
+ cache_options = {}
213
+
214
+ # Configure the open parameters
215
+ open_kwargs = {"mode": mode, **kwargs}
216
+ if compression:
217
+ open_kwargs["compression"] = compression
218
+
219
+ if block_size:
220
+ open_kwargs["block_size"] = block_size
221
+
222
+ # Apply caching strategy
223
+ if cache_type != "none":
224
+ open_kwargs["cache_type"] = cache_type
225
+ open_kwargs["cache_options"] = cache_options
226
+
227
+ # Use aiofiles for local files
228
+ if fs.protocol == "file":
229
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
230
+ yield f
231
+ else:
232
+ # This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
233
+ file_handle = None
234
+ try:
235
+ if "b" not in mode:
236
+ raise ValueError("Mode must include 'b' for binary access, when using remote files.")
237
+ if isinstance(fs, AsyncFileSystem):
238
+ file_handle = await fs.open_async(self.path, mode)
239
+ yield file_handle
240
+ return
241
+ except NotImplementedError:
242
+ logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
243
+ finally:
244
+ if file_handle is not None:
245
+ file_handle.close()
246
+
247
+ with fs.open(self.path, mode) as file_handle:
248
+ yield file_handle
249
+
250
+ def exists_sync(self) -> bool:
251
+ """
252
+ Synchronously check if the file exists.
253
+
254
+ Returns:
255
+ True if the file exists, False otherwise
256
+
257
+ Example:
258
+ ```python
259
+ if file.exists_sync():
260
+ # Process the file
261
+ ```
262
+ """
263
+ fs = storage.get_underlying_filesystem(path=self.path)
264
+ return fs.exists(self.path)
265
+
266
+ @contextmanager
267
+ def open_sync(
268
+ self,
269
+ mode: str = "rb",
270
+ block_size: Optional[int] = None,
271
+ cache_type: str = "readahead",
272
+ cache_options: Optional[dict] = None,
273
+ compression: Optional[str] = None,
274
+ **kwargs,
275
+ ) -> Generator[IO[Any]]:
276
+ """
277
+ Synchronously open the file and return a file-like object.
278
+
279
+ Args:
280
+ mode: The mode to open the file in (default: 'rb')
281
+ block_size: Size of blocks for reading (bytes)
282
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
283
+ cache_options: Dictionary of options for the cache
284
+ compression: Compression format or None for auto-detection
285
+ **kwargs: Additional arguments passed to fsspec's open method
286
+
287
+ Returns:
288
+ A file-like object
289
+
290
+ Example:
291
+ ```python
292
+ with file.open_sync('rb') as f:
293
+ data = f.read()
294
+ ```
295
+ """
296
+ fs = storage.get_underlying_filesystem(path=self.path)
297
+
298
+ # Set up cache options if provided
299
+ if cache_options is None:
300
+ cache_options = {}
301
+
302
+ # Configure the open parameters
303
+ open_kwargs = {"mode": mode, "compression": compression, **kwargs}
304
+
305
+ if block_size:
306
+ open_kwargs["block_size"] = block_size
307
+
308
+ # Apply caching strategy
309
+ if cache_type != "none":
310
+ open_kwargs["cache_type"] = cache_type
311
+ open_kwargs["cache_options"] = cache_options
312
+
313
+ with fs.open(self.path, **open_kwargs) as f:
314
+ yield f
315
+
316
+ # TODO sync needs to be implemented
317
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
318
+ """
319
+ Asynchronously download the file to a local path.
320
+
321
+ Args:
322
+ local_path: The local path to download the file to. If None, a temporary
323
+ directory will be used.
324
+
325
+ Returns:
326
+ The path to the downloaded file
327
+
328
+ Example:
329
+ ```python
330
+ local_file = await file.download('/tmp/myfile.csv')
331
+ ```
332
+ """
333
+ if local_path is None:
334
+ local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
335
+ else:
336
+ local_path = str(Path(local_path).absolute())
337
+
338
+ fs = storage.get_underlying_filesystem(path=self.path)
339
+
340
+ # If it's already a local file, just copy it
341
+ if "file" in fs.protocol:
342
+ # Use aiofiles for async copy
343
+ async with aiofiles.open(self.path, "rb") as src:
344
+ async with aiofiles.open(local_path, "wb") as dst:
345
+ await dst.write(await src.read())
346
+ return str(local_path)
347
+
348
+ # Otherwise download from remote using async functionality
349
+ await storage.get(self.path, str(local_path))
350
+ return str(local_path)
351
+
352
+ @classmethod
353
+ @requires_initialization
354
+ async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
355
+ """
356
+ Create a new File object from a local file that will be uploaded to the configured remote store.
357
+
358
+ Args:
359
+ local_path: Path to the local file
360
+ remote_destination: Optional path to store the file remotely. If None, a path will be generated.
361
+
362
+ Returns:
363
+ A new File instance pointing to the uploaded file
364
+
365
+ Example:
366
+ ```python
367
+ remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
368
+ ```
369
+ """
370
+ if not os.path.exists(local_path):
371
+ raise ValueError(f"File not found: {local_path}")
372
+
373
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path()
374
+ protocol = get_protocol(remote_path)
375
+ filename = Path(local_path).name
376
+
377
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
378
+ # then let's optimize by not uploading.
379
+ if "file" in protocol:
380
+ if remote_destination is None:
381
+ path = str(Path(local_path).absolute())
382
+ else:
383
+ # Otherwise, actually make a copy of the file
384
+ async with aiofiles.open(remote_path, "rb") as src:
385
+ async with aiofiles.open(local_path, "wb") as dst:
386
+ await dst.write(await src.read())
387
+ path = str(Path(remote_path).absolute())
388
+ else:
389
+ # Otherwise upload to remote using async storage layer
390
+ path = await storage.put(str(local_path), remote_path)
391
+
392
+ f = cls(path=path, name=filename)
393
+ return f
394
+
395
+
396
+ class FileTransformer(TypeTransformer[File]):
397
+ """
398
+ Transformer for File objects. This type transformer does not handle any i/o. That is now the responsibility of the
399
+ user.
400
+ """
401
+
402
+ def __init__(self):
403
+ super().__init__(name="File", t=File)
404
+
405
+ def get_literal_type(self, t: Type[File]) -> types_pb2.LiteralType:
406
+ """Get the Flyte literal type for a File type."""
407
+ return types_pb2.LiteralType(
408
+ blob=types_pb2.BlobType(
409
+ # todo: set format from generic
410
+ format="", # Format is determined by the generic type T
411
+ dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE,
412
+ )
413
+ )
414
+
415
+ async def to_literal(
416
+ self,
417
+ python_val: File,
418
+ python_type: Type[File],
419
+ expected: types_pb2.LiteralType,
420
+ ) -> literals_pb2.Literal:
421
+ """Convert a File object to a Flyte literal."""
422
+ if not isinstance(python_val, File):
423
+ raise TypeTransformerFailedError(f"Expected File object, received {type(python_val)}")
424
+
425
+ return literals_pb2.Literal(
426
+ scalar=literals_pb2.Scalar(
427
+ blob=literals_pb2.Blob(
428
+ metadata=literals_pb2.BlobMetadata(
429
+ type=types_pb2.BlobType(
430
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
431
+ )
432
+ ),
433
+ uri=python_val.path,
434
+ )
435
+ )
436
+ )
437
+
438
+ async def to_python_value(
439
+ self,
440
+ lv: literals_pb2.Literal,
441
+ expected_python_type: Type[File],
442
+ ) -> File:
443
+ """Convert a Flyte literal to a File object."""
444
+ if not lv.scalar.HasField("blob"):
445
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
446
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE:
447
+ raise TypeTransformerFailedError(
448
+ f"Expected single part blob, received {lv.scalar.blob.metadata.type.dimensionality}"
449
+ )
450
+
451
+ uri = lv.scalar.blob.uri
452
+ filename = Path(uri).name
453
+ f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
454
+ return f
455
+
456
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
457
+ """Guess the Python type from a Flyte literal type."""
458
+ if (
459
+ literal_type.HasField("blob")
460
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
461
+ and literal_type.blob.format != "PythonPickle" # see pickle transformer
462
+ ):
463
+ return File
464
+ raise ValueError(f"Cannot guess python type from {literal_type}")
465
+
466
+
467
+ TypeEngine.register(FileTransformer())
@@ -0,0 +1,129 @@
1
+ """
2
+ Flytekit StructuredDataset
3
+ ==========================================================
4
+ .. currentmodule:: flytekit.types.structured
5
+
6
+ .. autosummary::
7
+ :template: custom.rst
8
+ :toctree: generated/
9
+
10
+ StructuredDataset
11
+ StructuredDatasetDecoder
12
+ StructuredDatasetEncoder
13
+ """
14
+
15
+ import functools
16
+
17
+ from flyte._logging import logger
18
+ from flyte._utils.lazy_module import is_imported
19
+
20
+ from .structured_dataset import (
21
+ DuplicateHandlerError,
22
+ StructuredDataset,
23
+ StructuredDatasetDecoder,
24
+ StructuredDatasetEncoder,
25
+ StructuredDatasetTransformerEngine,
26
+ )
27
+
28
+
29
+ @functools.lru_cache(maxsize=None)
30
+ def register_csv_handlers():
31
+ from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
32
+
33
+ StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
34
+ StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
35
+
36
+
37
+ @functools.lru_cache(maxsize=None)
38
+ def register_pandas_handlers():
39
+ import pandas as pd
40
+
41
+ from flyte.types._renderer import TopFrameRenderer
42
+
43
+ from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
44
+
45
+ StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
46
+ StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
47
+ StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
48
+
49
+
50
+ @functools.lru_cache(maxsize=None)
51
+ def register_arrow_handlers():
52
+ import pyarrow as pa
53
+
54
+ from flyte.types._renderer import ArrowRenderer
55
+
56
+ from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
57
+
58
+ StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
59
+ StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
60
+ StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
61
+
62
+
63
+ @functools.lru_cache(maxsize=None)
64
+ def register_bigquery_handlers():
65
+ try:
66
+ from .bigquery import (
67
+ ArrowToBQEncodingHandlers,
68
+ BQToArrowDecodingHandler,
69
+ BQToPandasDecodingHandler,
70
+ PandasToBQEncodingHandlers,
71
+ )
72
+
73
+ StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
74
+ StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
75
+ StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
76
+ StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
77
+ except ImportError:
78
+ logger.info(
79
+ "We won't register bigquery handler for structured dataset because "
80
+ "we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
81
+ )
82
+
83
+
84
+ @functools.lru_cache(maxsize=None)
85
+ def register_snowflake_handlers():
86
+ try:
87
+ from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
88
+
89
+ StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
90
+ StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
91
+
92
+ except ImportError:
93
+ logger.info(
94
+ "We won't register snowflake handler for structured dataset because "
95
+ "we can't find package snowflake-connector-python"
96
+ )
97
+
98
+
99
+ def lazy_import_structured_dataset_handler():
100
+ if is_imported("pandas"):
101
+ try:
102
+ register_pandas_handlers()
103
+ register_csv_handlers()
104
+ except DuplicateHandlerError:
105
+ logger.debug("Transformer for pandas is already registered.")
106
+ if is_imported("pyarrow"):
107
+ try:
108
+ register_arrow_handlers()
109
+ except DuplicateHandlerError:
110
+ logger.debug("Transformer for arrow is already registered.")
111
+ if is_imported("google.cloud.bigquery"):
112
+ try:
113
+ register_bigquery_handlers()
114
+ except DuplicateHandlerError:
115
+ logger.debug("Transformer for bigquery is already registered.")
116
+ if is_imported("snowflake.connector"):
117
+ try:
118
+ register_snowflake_handlers()
119
+ except DuplicateHandlerError:
120
+ logger.debug("Transformer for snowflake is already registered.")
121
+
122
+
123
+ __all__ = [
124
+ "StructuredDataset",
125
+ "StructuredDatasetDecoder",
126
+ "StructuredDatasetEncoder",
127
+ "StructuredDatasetTransformerEngine",
128
+ "lazy_import_structured_dataset_handler",
129
+ ]