flyte 2.0.0b13__py3-none-any.whl → 2.0.0b30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. flyte/__init__.py +18 -2
  2. flyte/_bin/debug.py +38 -0
  3. flyte/_bin/runtime.py +62 -8
  4. flyte/_cache/cache.py +4 -2
  5. flyte/_cache/local_cache.py +216 -0
  6. flyte/_code_bundle/_ignore.py +12 -4
  7. flyte/_code_bundle/_packaging.py +13 -9
  8. flyte/_code_bundle/_utils.py +18 -10
  9. flyte/_code_bundle/bundle.py +17 -9
  10. flyte/_constants.py +1 -0
  11. flyte/_context.py +4 -1
  12. flyte/_custom_context.py +73 -0
  13. flyte/_debug/constants.py +38 -0
  14. flyte/_debug/utils.py +17 -0
  15. flyte/_debug/vscode.py +307 -0
  16. flyte/_deploy.py +235 -61
  17. flyte/_environment.py +20 -6
  18. flyte/_excepthook.py +1 -1
  19. flyte/_hash.py +1 -16
  20. flyte/_image.py +178 -81
  21. flyte/_initialize.py +132 -51
  22. flyte/_interface.py +39 -2
  23. flyte/_internal/controllers/__init__.py +4 -5
  24. flyte/_internal/controllers/_local_controller.py +70 -29
  25. flyte/_internal/controllers/_trace.py +1 -1
  26. flyte/_internal/controllers/remote/__init__.py +0 -2
  27. flyte/_internal/controllers/remote/_action.py +14 -16
  28. flyte/_internal/controllers/remote/_client.py +1 -1
  29. flyte/_internal/controllers/remote/_controller.py +68 -70
  30. flyte/_internal/controllers/remote/_core.py +127 -99
  31. flyte/_internal/controllers/remote/_informer.py +19 -10
  32. flyte/_internal/controllers/remote/_service_protocol.py +7 -7
  33. flyte/_internal/imagebuild/docker_builder.py +181 -69
  34. flyte/_internal/imagebuild/image_builder.py +0 -5
  35. flyte/_internal/imagebuild/remote_builder.py +155 -64
  36. flyte/_internal/imagebuild/utils.py +51 -2
  37. flyte/_internal/resolvers/_task_module.py +5 -38
  38. flyte/_internal/resolvers/default.py +2 -2
  39. flyte/_internal/runtime/convert.py +110 -21
  40. flyte/_internal/runtime/entrypoints.py +27 -1
  41. flyte/_internal/runtime/io.py +21 -8
  42. flyte/_internal/runtime/resources_serde.py +20 -6
  43. flyte/_internal/runtime/reuse.py +1 -1
  44. flyte/_internal/runtime/rusty.py +20 -5
  45. flyte/_internal/runtime/task_serde.py +34 -19
  46. flyte/_internal/runtime/taskrunner.py +22 -4
  47. flyte/_internal/runtime/trigger_serde.py +160 -0
  48. flyte/_internal/runtime/types_serde.py +1 -1
  49. flyte/_keyring/__init__.py +0 -0
  50. flyte/_keyring/file.py +115 -0
  51. flyte/_logging.py +201 -39
  52. flyte/_map.py +111 -14
  53. flyte/_module.py +70 -0
  54. flyte/_pod.py +4 -3
  55. flyte/_resources.py +213 -31
  56. flyte/_run.py +110 -39
  57. flyte/_task.py +75 -16
  58. flyte/_task_environment.py +105 -29
  59. flyte/_task_plugins.py +4 -2
  60. flyte/_trace.py +5 -0
  61. flyte/_trigger.py +1000 -0
  62. flyte/_utils/__init__.py +2 -1
  63. flyte/_utils/asyn.py +3 -1
  64. flyte/_utils/coro_management.py +2 -1
  65. flyte/_utils/docker_credentials.py +173 -0
  66. flyte/_utils/module_loader.py +17 -2
  67. flyte/_version.py +3 -3
  68. flyte/cli/_abort.py +3 -3
  69. flyte/cli/_build.py +3 -6
  70. flyte/cli/_common.py +78 -7
  71. flyte/cli/_create.py +182 -4
  72. flyte/cli/_delete.py +23 -1
  73. flyte/cli/_deploy.py +63 -16
  74. flyte/cli/_get.py +79 -34
  75. flyte/cli/_params.py +26 -10
  76. flyte/cli/_plugins.py +209 -0
  77. flyte/cli/_run.py +151 -26
  78. flyte/cli/_serve.py +64 -0
  79. flyte/cli/_update.py +37 -0
  80. flyte/cli/_user.py +17 -0
  81. flyte/cli/main.py +30 -4
  82. flyte/config/_config.py +10 -6
  83. flyte/config/_internal.py +1 -0
  84. flyte/config/_reader.py +29 -8
  85. flyte/connectors/__init__.py +11 -0
  86. flyte/connectors/_connector.py +270 -0
  87. flyte/connectors/_server.py +197 -0
  88. flyte/connectors/utils.py +135 -0
  89. flyte/errors.py +22 -2
  90. flyte/extend.py +8 -1
  91. flyte/extras/_container.py +6 -1
  92. flyte/git/__init__.py +3 -0
  93. flyte/git/_config.py +21 -0
  94. flyte/io/__init__.py +2 -0
  95. flyte/io/_dataframe/__init__.py +2 -0
  96. flyte/io/_dataframe/basic_dfs.py +17 -8
  97. flyte/io/_dataframe/dataframe.py +98 -132
  98. flyte/io/_dir.py +575 -113
  99. flyte/io/_file.py +582 -139
  100. flyte/io/_hashing_io.py +342 -0
  101. flyte/models.py +74 -15
  102. flyte/remote/__init__.py +6 -1
  103. flyte/remote/_action.py +34 -26
  104. flyte/remote/_client/_protocols.py +39 -4
  105. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  106. flyte/remote/_client/auth/_authenticators/pkce.py +1 -1
  107. flyte/remote/_client/auth/_channel.py +10 -6
  108. flyte/remote/_client/controlplane.py +17 -5
  109. flyte/remote/_console.py +3 -2
  110. flyte/remote/_data.py +6 -6
  111. flyte/remote/_logs.py +3 -3
  112. flyte/remote/_run.py +64 -8
  113. flyte/remote/_secret.py +26 -17
  114. flyte/remote/_task.py +75 -33
  115. flyte/remote/_trigger.py +306 -0
  116. flyte/remote/_user.py +33 -0
  117. flyte/report/_report.py +1 -1
  118. flyte/storage/__init__.py +6 -1
  119. flyte/storage/_config.py +5 -1
  120. flyte/storage/_parallel_reader.py +274 -0
  121. flyte/storage/_storage.py +200 -103
  122. flyte/types/__init__.py +16 -0
  123. flyte/types/_interface.py +2 -2
  124. flyte/types/_pickle.py +35 -8
  125. flyte/types/_string_literals.py +8 -9
  126. flyte/types/_type_engine.py +40 -70
  127. flyte/types/_utils.py +1 -1
  128. flyte-2.0.0b30.data/scripts/debug.py +38 -0
  129. {flyte-2.0.0b13.data → flyte-2.0.0b30.data}/scripts/runtime.py +62 -8
  130. {flyte-2.0.0b13.dist-info → flyte-2.0.0b30.dist-info}/METADATA +11 -3
  131. flyte-2.0.0b30.dist-info/RECORD +192 -0
  132. {flyte-2.0.0b13.dist-info → flyte-2.0.0b30.dist-info}/entry_points.txt +3 -0
  133. flyte/_protos/common/authorization_pb2.py +0 -66
  134. flyte/_protos/common/authorization_pb2.pyi +0 -108
  135. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  136. flyte/_protos/common/identifier_pb2.py +0 -93
  137. flyte/_protos/common/identifier_pb2.pyi +0 -110
  138. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  139. flyte/_protos/common/identity_pb2.py +0 -48
  140. flyte/_protos/common/identity_pb2.pyi +0 -72
  141. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  142. flyte/_protos/common/list_pb2.py +0 -36
  143. flyte/_protos/common/list_pb2.pyi +0 -71
  144. flyte/_protos/common/list_pb2_grpc.py +0 -4
  145. flyte/_protos/common/policy_pb2.py +0 -37
  146. flyte/_protos/common/policy_pb2.pyi +0 -27
  147. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  148. flyte/_protos/common/role_pb2.py +0 -37
  149. flyte/_protos/common/role_pb2.pyi +0 -53
  150. flyte/_protos/common/role_pb2_grpc.py +0 -4
  151. flyte/_protos/common/runtime_version_pb2.py +0 -28
  152. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  153. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  154. flyte/_protos/imagebuilder/definition_pb2.py +0 -59
  155. flyte/_protos/imagebuilder/definition_pb2.pyi +0 -140
  156. flyte/_protos/imagebuilder/definition_pb2_grpc.py +0 -4
  157. flyte/_protos/imagebuilder/payload_pb2.py +0 -32
  158. flyte/_protos/imagebuilder/payload_pb2.pyi +0 -21
  159. flyte/_protos/imagebuilder/payload_pb2_grpc.py +0 -4
  160. flyte/_protos/imagebuilder/service_pb2.py +0 -29
  161. flyte/_protos/imagebuilder/service_pb2.pyi +0 -5
  162. flyte/_protos/imagebuilder/service_pb2_grpc.py +0 -66
  163. flyte/_protos/logs/dataplane/payload_pb2.py +0 -100
  164. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -177
  165. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  166. flyte/_protos/secret/definition_pb2.py +0 -49
  167. flyte/_protos/secret/definition_pb2.pyi +0 -93
  168. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  169. flyte/_protos/secret/payload_pb2.py +0 -62
  170. flyte/_protos/secret/payload_pb2.pyi +0 -94
  171. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  172. flyte/_protos/secret/secret_pb2.py +0 -38
  173. flyte/_protos/secret/secret_pb2.pyi +0 -6
  174. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  175. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  176. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  177. flyte/_protos/workflow/common_pb2.py +0 -27
  178. flyte/_protos/workflow/common_pb2.pyi +0 -14
  179. flyte/_protos/workflow/common_pb2_grpc.py +0 -4
  180. flyte/_protos/workflow/environment_pb2.py +0 -29
  181. flyte/_protos/workflow/environment_pb2.pyi +0 -12
  182. flyte/_protos/workflow/environment_pb2_grpc.py +0 -4
  183. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  184. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  185. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  186. flyte/_protos/workflow/queue_service_pb2.py +0 -109
  187. flyte/_protos/workflow/queue_service_pb2.pyi +0 -166
  188. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  189. flyte/_protos/workflow/run_definition_pb2.py +0 -121
  190. flyte/_protos/workflow/run_definition_pb2.pyi +0 -327
  191. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  192. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  193. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  194. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  195. flyte/_protos/workflow/run_service_pb2.py +0 -137
  196. flyte/_protos/workflow/run_service_pb2.pyi +0 -185
  197. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -446
  198. flyte/_protos/workflow/state_service_pb2.py +0 -67
  199. flyte/_protos/workflow/state_service_pb2.pyi +0 -76
  200. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  201. flyte/_protos/workflow/task_definition_pb2.py +0 -79
  202. flyte/_protos/workflow/task_definition_pb2.pyi +0 -81
  203. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  204. flyte/_protos/workflow/task_service_pb2.py +0 -60
  205. flyte/_protos/workflow/task_service_pb2.pyi +0 -59
  206. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -138
  207. flyte-2.0.0b13.dist-info/RECORD +0 -239
  208. /flyte/{_protos → _debug}/__init__.py +0 -0
  209. {flyte-2.0.0b13.dist-info → flyte-2.0.0b30.dist-info}/WHEEL +0 -0
  210. {flyte-2.0.0b13.dist-info → flyte-2.0.0b30.dist-info}/licenses/LICENSE +0 -0
  211. {flyte-2.0.0b13.dist-info → flyte-2.0.0b30.dist-info}/top_level.txt +0 -0
flyte/io/_file.py CHANGED
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import inspect
3
4
  import os
5
+ import typing
4
6
  from contextlib import asynccontextmanager, contextmanager
5
7
  from pathlib import Path
6
8
  from typing import (
7
9
  IO,
10
+ Annotated,
8
11
  Any,
9
12
  AsyncGenerator,
10
13
  Dict,
@@ -17,18 +20,25 @@ from typing import (
17
20
  )
18
21
 
19
22
  import aiofiles
20
- from flyteidl.core import literals_pb2, types_pb2
21
- from fsspec.asyn import AsyncFileSystem
23
+ from flyteidl2.core import literals_pb2, types_pb2
22
24
  from fsspec.utils import get_protocol
23
25
  from mashumaro.types import SerializableType
24
- from pydantic import BaseModel, model_validator
26
+ from pydantic import BaseModel, Field, model_validator
27
+ from pydantic.json_schema import SkipJsonSchema
25
28
 
29
+ import flyte.errors
26
30
  import flyte.storage as storage
27
31
  from flyte._context import internal_ctx
28
32
  from flyte._initialize import requires_initialization
29
- from flyte._logging import logger
33
+ from flyte.io._hashing_io import AsyncHashingReader, HashingWriter, HashMethod, PrecomputedValue
30
34
  from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
31
35
 
36
+ if typing.TYPE_CHECKING:
37
+ from obstore import AsyncReadableFile, AsyncWritableFile
38
+
39
+ if typing.TYPE_CHECKING:
40
+ from obstore import AsyncReadableFile, AsyncWritableFile
41
+
32
42
  # Type variable for the file format
33
43
  T = TypeVar("T")
34
44
 
@@ -36,63 +46,139 @@ T = TypeVar("T")
36
46
  class File(BaseModel, Generic[T], SerializableType):
37
47
  """
38
48
  A generic file class representing a file with a specified format.
39
- Provides both async and sync interfaces for file operations.
40
- Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
49
+ Provides both async and sync interfaces for file operations. All methods without _sync suffix are async.
50
+
51
+ The class should be instantiated using one of the class methods. The constructor should be used only to
52
+ instantiate references to existing remote objects.
41
53
 
42
54
  The generic type T represents the format of the file.
43
55
 
44
- Example:
45
- ```python
46
- # Async usage
47
- from pandas import DataFrame
48
- csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
56
+ Important methods:
57
+ - `from_existing_remote`: Create a File object from an existing remote file.
58
+ - `new_remote`: Create a new File reference for a remote file that will be written to.
49
59
 
50
- async with csv_file.open() as f:
51
- content = await f.read()
60
+ **Asynchronous methods**:
61
+ - `open`: Asynchronously open the file and return a file-like object.
62
+ - `download`: Asynchronously download the file to a local path.
63
+ - `from_local`: Asynchronously create a File object from a local file, uploading it to remote storage.
64
+ - `exists`: Asynchronously check if the file exists.
52
65
 
53
- # Sync alternative
54
- with csv_file.open_sync() as f:
55
- content = f.read()
56
- ```
66
+ **Synchronous methods** (suffixed with `_sync`):
67
+ - `open_sync`: Synchronously open the file and return a file-like object.
68
+ - `download_sync`: Synchronously download the file to a local path.
69
+ - `from_local_sync`: Synchronously create a File object from a local file, uploading it to remote storage.
70
+ - `exists_sync`: Synchronously check if the file exists.
57
71
 
58
- Example: Read a file input in a Task.
59
- ```
72
+ Example: Read a file input in a Task (Async).
73
+
74
+ ```python
60
75
  @env.task
61
- async def my_task(file: File[DataFrame]):
62
- async with file.open() as f:
63
- df = pd.read_csv(f)
76
+ async def read_file(file: File) -> str:
77
+ async with file.open("rb") as f:
78
+ content = bytes(await f.read())
79
+ return content.decode("utf-8")
64
80
  ```
65
81
 
66
- Example: Write a file by streaming it directly to blob storage
82
+ Example: Read a file input in a Task (Sync).
83
+
84
+ ```python
85
+ @env.task
86
+ def read_file_sync(file: File) -> str:
87
+ with file.open_sync("rb") as f:
88
+ content = f.read()
89
+ return content.decode("utf-8")
67
90
  ```
91
+
92
+ Example: Write a file by streaming it directly to blob storage (Async).
93
+
94
+ ```python
68
95
  @env.task
69
- async def my_task() -> File[DataFrame]:
70
- df = pd.DataFrame(...)
96
+ async def write_file() -> File:
71
97
  file = File.new_remote()
72
98
  async with file.open("wb") as f:
73
- df.to_csv(f)
74
- # No additional uploading will be done here.
99
+ await f.write(b"Hello, World!")
75
100
  return file
76
101
  ```
77
- Example: Write a file by writing it locally first, and then uploading it.
102
+
103
+ Example: Upload a local file to remote storage (Async).
104
+
105
+ ```python
106
+ @env.task
107
+ async def upload_file() -> File:
108
+ # Write to local file first
109
+ with open("/tmp/data.csv", "w") as f:
110
+ f.write("col1,col2\\n1,2\\n3,4\\n")
111
+ # Upload to remote storage
112
+ return await File.from_local("/tmp/data.csv")
78
113
  ```
114
+
115
+ Example: Upload a local file to remote storage (Sync).
116
+
117
+ ```python
79
118
  @env.task
80
- async def my_task() -> File[DataFrame]:
81
- # write to /tmp/data.csv
82
- return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
119
+ def upload_file_sync() -> File:
120
+ # Write to local file first
121
+ with open("/tmp/data.csv", "w") as f:
122
+ f.write("col1,col2\\n1,2\\n3,4\\n")
123
+ # Upload to remote storage
124
+ return File.from_local_sync("/tmp/data.csv")
83
125
  ```
84
126
 
85
- Example: From an existing remote file
127
+ Example: Download a file to local storage (Async).
128
+
129
+ ```python
130
+ @env.task
131
+ async def download_file(file: File) -> str:
132
+ local_path = await file.download()
133
+ # Process the local file
134
+ with open(local_path, "r") as f:
135
+ return f.read()
86
136
  ```
137
+
138
+ Example: Download a file to local storage (Sync).
139
+
140
+ ```python
87
141
  @env.task
88
- async def my_task() -> File[DataFrame]:
89
- return File.from_existing_remote("s3://my-bucket/data.csv")
142
+ def download_file_sync(file: File) -> str:
143
+ local_path = file.download_sync()
144
+ # Process the local file
145
+ with open(local_path, "r") as f:
146
+ return f.read()
90
147
  ```
91
148
 
92
- Example: Take a remote file as input and return the same one, should not do any copy
149
+ Example: Reference an existing remote file.
150
+
151
+ ```python
152
+ @env.task
153
+ async def process_existing_file() -> str:
154
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
155
+ async with file.open("rb") as f:
156
+ content = await f.read()
157
+ return content.decode("utf-8")
93
158
  ```
159
+
160
+ Example: Check if a file exists (Async).
161
+
162
+ ```python
94
163
  @env.task
95
- async def my_task(file: File[DataFrame]) -> File[DataFrame]:
164
+ async def check_file(file: File) -> bool:
165
+ return await file.exists()
166
+ ```
167
+
168
+ Example: Check if a file exists (Sync).
169
+
170
+ ```python
171
+ @env.task
172
+ def check_file_sync(file: File) -> bool:
173
+ return file.exists_sync()
174
+ ```
175
+
176
+ Example: Pass through a file without copying.
177
+
178
+ ```python
179
+ @env.task
180
+ async def pass_through(file: File) -> File:
181
+ # No copy occurs - just passes the reference
96
182
  return file
97
183
  ```
98
184
 
@@ -104,6 +190,8 @@ class File(BaseModel, Generic[T], SerializableType):
104
190
  path: str
105
191
  name: Optional[str] = None
106
192
  format: str = ""
193
+ hash: Optional[str] = None
194
+ hash_method: Annotated[Optional[HashMethod], Field(default=None, exclude=True), SkipJsonSchema()] = None
107
195
 
108
196
  class Config:
109
197
  arbitrary_types_allowed = True
@@ -111,20 +199,24 @@ class File(BaseModel, Generic[T], SerializableType):
111
199
  @model_validator(mode="before")
112
200
  @classmethod
113
201
  def pre_init(cls, data):
202
+ """Internal: Pydantic validator to set default name from path. Not intended for direct use."""
114
203
  if data.get("name") is None:
115
204
  data["name"] = Path(data["path"]).name
116
205
  return data
117
206
 
118
207
  def _serialize(self) -> Dict[str, Optional[str]]:
208
+ """Internal: Serialize File to dictionary. Not intended for direct use."""
119
209
  pyd_dump = self.model_dump()
120
210
  return pyd_dump
121
211
 
122
212
  @classmethod
123
213
  def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
214
+ """Internal: Deserialize File from dictionary. Not intended for direct use."""
124
215
  return File.model_validate(file_dump)
125
216
 
126
217
  @classmethod
127
218
  def schema_match(cls, incoming: dict):
219
+ """Internal: Check if incoming schema matches File schema. Not intended for direct use."""
128
220
  this_schema = cls.model_json_schema()
129
221
  current_required = this_schema.get("required")
130
222
  incoming_required = incoming.get("required")
@@ -139,41 +231,65 @@ class File(BaseModel, Generic[T], SerializableType):
139
231
 
140
232
  @classmethod
141
233
  @requires_initialization
142
- def new_remote(cls) -> File[T]:
234
+ def new_remote(cls, hash_method: Optional[HashMethod | str] = None) -> File[T]:
143
235
  """
144
236
  Create a new File reference for a remote file that will be written to.
145
237
 
146
- Example:
147
- ```
238
+ Use this when you want to create a new file and write to it directly without creating a local file first.
239
+
240
+ Example (Async):
241
+
242
+ ```python
148
243
  @env.task
149
- async def my_task() -> File[DataFrame]:
150
- df = pd.DataFrame(...)
244
+ async def create_csv() -> File:
245
+ df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
151
246
  file = File.new_remote()
152
247
  async with file.open("wb") as f:
153
248
  df.to_csv(f)
154
249
  return file
155
250
  ```
251
+
252
+ Args:
253
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
254
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will be used
255
+ to compute the hash as data is written.
256
+
257
+ Returns:
258
+ A new File instance with a generated remote path
156
259
  """
157
260
  ctx = internal_ctx()
261
+ known_cache_key = hash_method if isinstance(hash_method, str) else None
262
+ method = hash_method if isinstance(hash_method, HashMethod) else None
158
263
 
159
- return cls(path=ctx.raw_data.get_random_remote_path())
264
+ return cls(path=ctx.raw_data.get_random_remote_path(), hash=known_cache_key, hash_method=method)
160
265
 
161
266
  @classmethod
162
- def from_existing_remote(cls, remote_path: str) -> File[T]:
267
+ def from_existing_remote(cls, remote_path: str, file_cache_key: Optional[str] = None) -> File[T]:
163
268
  """
164
269
  Create a File reference from an existing remote file.
165
270
 
271
+ Use this when you want to reference a file that already exists in remote storage without uploading it.
272
+
166
273
  Example:
274
+
167
275
  ```python
168
276
  @env.task
169
- async def my_task() -> File[DataFrame]:
170
- return File.from_existing_remote("s3://my-bucket/data.csv")
277
+ async def process_existing_file() -> str:
278
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
279
+ async with file.open("rb") as f:
280
+ content = await f.read()
281
+ return content.decode("utf-8")
171
282
  ```
172
283
 
173
284
  Args:
174
285
  remote_path: The remote path to the existing file
286
+ file_cache_key: Optional hash value to use for cache key computation. If not specified, the cache key
287
+ will be computed based on the file's attributes (path, name, format).
288
+
289
+ Returns:
290
+ A new File instance pointing to the existing remote file
175
291
  """
176
- return cls(path=remote_path)
292
+ return cls(path=remote_path, hash=file_cache_key)
177
293
 
178
294
  @asynccontextmanager
179
295
  async def open(
@@ -184,84 +300,129 @@ class File(BaseModel, Generic[T], SerializableType):
184
300
  cache_options: Optional[dict] = None,
185
301
  compression: Optional[str] = None,
186
302
  **kwargs,
187
- ) -> AsyncGenerator[IO[Any]]:
303
+ ) -> AsyncGenerator[Union[AsyncWritableFile, AsyncReadableFile, "HashingWriter"], None]:
188
304
  """
189
305
  Asynchronously open the file and return a file-like object.
190
306
 
307
+ Use this method in async tasks to read from or write to files directly.
308
+
309
+ Example (Async Read):
310
+
311
+ ```python
312
+ @env.task
313
+ async def read_file(f: File) -> str:
314
+ async with f.open("rb") as fh:
315
+ content = bytes(await fh.read())
316
+ return content.decode("utf-8")
317
+ ```
318
+
319
+ Example (Async Write):
320
+
321
+ ```python
322
+ @env.task
323
+ async def write_file() -> File:
324
+ f = File.new_remote()
325
+ async with f.open("wb") as fh:
326
+ await fh.write(b"Hello, World!")
327
+ return f
328
+ ```
329
+
330
+ Example (Streaming Read):
331
+
332
+ ```python
333
+ @env.task
334
+ async def stream_read(f: File) -> str:
335
+ content_parts = []
336
+ async with f.open("rb", block_size=1024) as fh:
337
+ while True:
338
+ chunk = await fh.read()
339
+ if not chunk:
340
+ break
341
+ content_parts.append(chunk)
342
+ return b"".join(content_parts).decode("utf-8")
343
+ ```
344
+
191
345
  Args:
192
- mode: The mode to open the file in (default: 'rb')
193
- block_size: Size of blocks for reading (bytes)
346
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
347
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
348
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
194
349
  cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
195
350
  cache_options: Dictionary of options for the cache
196
351
  compression: Compression format or None for auto-detection
197
352
  **kwargs: Additional arguments passed to fsspec's open method
198
353
 
199
354
  Returns:
200
- An async file-like object
201
-
202
- Example:
203
- ```python
204
- async with file.open('rb') as f:
205
- data = await f.read()
206
- ```
355
+ An async file-like object that can be used with async read/write operations
207
356
  """
208
- fs = storage.get_underlying_filesystem(path=self.path)
209
-
210
- # Set up cache options if provided
211
- if cache_options is None:
212
- cache_options = {}
213
-
214
- # Configure the open parameters
215
- open_kwargs = {"mode": mode, **kwargs}
216
- if compression:
217
- open_kwargs["compression"] = compression
218
-
219
- if block_size:
220
- open_kwargs["block_size"] = block_size
221
-
222
- # Apply caching strategy
223
- if cache_type != "none":
224
- open_kwargs["cache_type"] = cache_type
225
- open_kwargs["cache_options"] = cache_options
226
-
227
- # Use aiofiles for local files
228
- if fs.protocol == "file":
229
- async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
230
- yield f
231
- else:
232
- # This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
233
- file_handle = None
357
+ # Check if we should use obstore bypass
358
+ try:
359
+ fh = await storage.open(
360
+ self.path,
361
+ mode=mode,
362
+ cache_type=cache_type,
363
+ cache_options=cache_options,
364
+ compression=compression,
365
+ block_size=block_size,
366
+ **kwargs,
367
+ )
234
368
  try:
235
- if "b" not in mode:
236
- raise ValueError("Mode must include 'b' for binary access, when using remote files.")
237
- if isinstance(fs, AsyncFileSystem):
238
- file_handle = await fs.open_async(self.path, mode)
239
- yield file_handle
240
- return
241
- except NotImplementedError:
242
- logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
369
+ yield fh
370
+ return
243
371
  finally:
244
- if file_handle is not None:
245
- file_handle.close()
372
+ if inspect.iscoroutinefunction(fh.close):
373
+ await fh.close()
374
+ else:
375
+ fh.close()
376
+ except flyte.errors.OnlyAsyncIOSupportedError:
377
+ # Fall back to aiofiles
378
+ fs = storage.get_underlying_filesystem(path=self.path)
379
+ if "file" in fs.protocol:
380
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
381
+ yield f
382
+ return
383
+ raise
384
+
385
+ async def exists(self) -> bool:
386
+ """
387
+ Asynchronously check if the file exists.
388
+
389
+ Example (Async):
246
390
 
247
- with fs.open(self.path, mode) as file_handle:
248
- yield file_handle
391
+ ```python
392
+ @env.task
393
+ async def check_file(f: File) -> bool:
394
+ if await f.exists():
395
+ print("File exists!")
396
+ return True
397
+ return False
398
+ ```
399
+
400
+ Returns:
401
+ True if the file exists, False otherwise
402
+ """
403
+ return await storage.exists(self.path)
249
404
 
250
405
  def exists_sync(self) -> bool:
251
406
  """
252
407
  Synchronously check if the file exists.
253
408
 
409
+ Use this in non-async tasks or when you need synchronous file existence checking.
410
+
411
+ Example (Sync):
412
+
413
+ ```python
414
+ @env.task
415
+ def check_file_sync(f: File) -> bool:
416
+ if f.exists_sync():
417
+ print("File exists!")
418
+ return True
419
+ return False
420
+ ```
421
+
254
422
  Returns:
255
423
  True if the file exists, False otherwise
256
-
257
- Example:
258
- ```python
259
- if file.exists_sync():
260
- # Process the file
261
- ```
262
424
  """
263
- fs = storage.get_underlying_filesystem(path=self.path)
264
- return fs.exists(self.path)
425
+ return storage.exists_sync(self.path)
265
426
 
266
427
  @contextmanager
267
428
  def open_sync(
@@ -272,26 +433,44 @@ class File(BaseModel, Generic[T], SerializableType):
272
433
  cache_options: Optional[dict] = None,
273
434
  compression: Optional[str] = None,
274
435
  **kwargs,
275
- ) -> Generator[IO[Any]]:
436
+ ) -> Generator[IO[Any], None, None]:
276
437
  """
277
438
  Synchronously open the file and return a file-like object.
278
439
 
440
+ Use this method in non-async tasks to read from or write to files directly.
441
+
442
+ Example (Sync Read):
443
+
444
+ ```python
445
+ @env.task
446
+ def read_file_sync(f: File) -> str:
447
+ with f.open_sync("rb") as fh:
448
+ content = fh.read()
449
+ return content.decode("utf-8")
450
+ ```
451
+
452
+ Example (Sync Write):
453
+
454
+ ```python
455
+ @env.task
456
+ def write_file_sync() -> File:
457
+ f = File.new_remote()
458
+ with f.open_sync("wb") as fh:
459
+ fh.write(b"Hello, World!")
460
+ return f
461
+ ```
462
+
279
463
  Args:
280
- mode: The mode to open the file in (default: 'rb')
281
- block_size: Size of blocks for reading (bytes)
464
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
465
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
466
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
282
467
  cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
283
468
  cache_options: Dictionary of options for the cache
284
469
  compression: Compression format or None for auto-detection
285
470
  **kwargs: Additional arguments passed to fsspec's open method
286
471
 
287
472
  Returns:
288
- A file-like object
289
-
290
- Example:
291
- ```python
292
- with file.open_sync('rb') as f:
293
- data = f.read()
294
- ```
473
+ A file-like object that can be used with standard read/write operations
295
474
  """
296
475
  fs = storage.get_underlying_filesystem(path=self.path)
297
476
 
@@ -318,54 +497,188 @@ class File(BaseModel, Generic[T], SerializableType):
318
497
  """
319
498
  Asynchronously download the file to a local path.
320
499
 
500
+ Use this when you need to download a remote file to your local filesystem for processing.
501
+
502
+ Example (Async):
503
+
504
+ ```python
505
+ @env.task
506
+ async def download_and_process(f: File) -> str:
507
+ local_path = await f.download()
508
+ # Now process the local file
509
+ with open(local_path, "r") as fh:
510
+ return fh.read()
511
+ ```
512
+
513
+ Example (Download to specific path):
514
+
515
+ ```python
516
+ @env.task
517
+ async def download_to_path(f: File) -> str:
518
+ local_path = await f.download("/tmp/myfile.csv")
519
+ return local_path
520
+ ```
521
+
321
522
  Args:
322
523
  local_path: The local path to download the file to. If None, a temporary
323
- directory will be used.
524
+ directory will be used and a path will be generated.
324
525
 
325
526
  Returns:
326
- The path to the downloaded file
327
-
328
- Example:
329
- ```python
330
- local_file = await file.download('/tmp/myfile.csv')
331
- ```
527
+ The absolute path to the downloaded file
332
528
  """
333
529
  if local_path is None:
334
- local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
530
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
335
531
  else:
532
+ # Preserve trailing separator if present (Path.absolute() strips it)
533
+ local_path_str = str(local_path)
534
+ has_trailing_sep = local_path_str.endswith(os.sep)
336
535
  local_path = str(Path(local_path).absolute())
536
+ if has_trailing_sep:
537
+ local_path = local_path + os.sep
337
538
 
338
539
  fs = storage.get_underlying_filesystem(path=self.path)
339
540
 
340
541
  # If it's already a local file, just copy it
341
542
  if "file" in fs.protocol:
543
+ # Apply directory logic for local-to-local copies
544
+ local_path_for_copy = local_path
545
+ if isinstance(local_path, str):
546
+ local_path_obj = Path(local_path)
547
+ # Check if it's a directory or ends with separator
548
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
549
+ remote_filename = Path(self.path).name
550
+ local_path_for_copy = str(local_path_obj / remote_filename)
551
+
552
+ # Ensure parent directory exists
553
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
554
+
342
555
  # Use aiofiles for async copy
343
556
  async with aiofiles.open(self.path, "rb") as src:
344
- async with aiofiles.open(local_path, "wb") as dst:
557
+ async with aiofiles.open(local_path_for_copy, "wb") as dst:
345
558
  await dst.write(await src.read())
346
- return str(local_path)
559
+ return str(local_path_for_copy)
347
560
 
348
561
  # Otherwise download from remote using async functionality
349
- await storage.get(self.path, str(local_path))
562
+ result_path = await storage.get(self.path, str(local_path))
563
+ return result_path
564
+
565
+ def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
566
+ """
567
+ Synchronously download the file to a local path.
568
+
569
+ Use this in non-async tasks when you need to download a remote file to your local filesystem.
570
+
571
+ Example (Sync):
572
+
573
+ ```python
574
+ @env.task
575
+ def download_and_process_sync(f: File) -> str:
576
+ local_path = f.download_sync()
577
+ # Now process the local file
578
+ with open(local_path, "r") as fh:
579
+ return fh.read()
580
+ ```
581
+
582
+ Example (Download to specific path):
583
+
584
+ ```python
585
+ @env.task
586
+ def download_to_path_sync(f: File) -> str:
587
+ local_path = f.download_sync("/tmp/myfile.csv")
588
+ return local_path
589
+ ```
590
+
591
+ Args:
592
+ local_path: The local path to download the file to. If None, a temporary
593
+ directory will be used and a path will be generated.
594
+
595
+ Returns:
596
+ The absolute path to the downloaded file
597
+ """
598
+ if local_path is None:
599
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
600
+ else:
601
+ # Preserve trailing separator if present (Path.absolute() strips it)
602
+ local_path_str = str(local_path)
603
+ has_trailing_sep = local_path_str.endswith(os.sep)
604
+ local_path = str(Path(local_path).absolute())
605
+ if has_trailing_sep:
606
+ local_path = local_path + os.sep
607
+
608
+ fs = storage.get_underlying_filesystem(path=self.path)
609
+
610
+ # If it's already a local file, just copy it
611
+ if "file" in fs.protocol:
612
+ # Apply directory logic for local-to-local copies
613
+ local_path_for_copy = local_path
614
+ if isinstance(local_path, str):
615
+ local_path_obj = Path(local_path)
616
+ # Check if it's a directory or ends with separator
617
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
618
+ remote_filename = Path(self.path).name
619
+ local_path_for_copy = str(local_path_obj / remote_filename)
620
+
621
+ # Ensure parent directory exists
622
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
623
+
624
+ # Use standard file operations for sync copy
625
+ import shutil
626
+
627
+ shutil.copy2(self.path, local_path_for_copy)
628
+ return str(local_path_for_copy)
629
+
630
+ # Otherwise download from remote using sync functionality
631
+ # Use the sync version of storage operations
632
+ with fs.open(self.path, "rb") as src:
633
+ with open(local_path, "wb") as dst:
634
+ dst.write(src.read())
350
635
  return str(local_path)
351
636
 
352
637
  @classmethod
353
638
  @requires_initialization
354
- async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
639
+ def from_local_sync(
640
+ cls,
641
+ local_path: Union[str, Path],
642
+ remote_destination: Optional[str] = None,
643
+ hash_method: Optional[HashMethod | str] = None,
644
+ ) -> File[T]:
355
645
  """
356
- Create a new File object from a local file that will be uploaded to the configured remote store.
646
+ Synchronously create a new File object from a local file by uploading it to remote storage.
647
+
648
+ Use this in non-async tasks when you have a local file that needs to be uploaded to remote storage.
649
+
650
+ Example (Sync):
651
+
652
+ ```python
653
+ @env.task
654
+ def upload_local_file_sync() -> File:
655
+ # Create a local file
656
+ with open("/tmp/data.csv", "w") as f:
657
+ f.write("col1,col2\n1,2\n3,4\n")
658
+
659
+ # Upload to remote storage
660
+ remote_file = File.from_local_sync("/tmp/data.csv")
661
+ return remote_file
662
+ ```
663
+
664
+ Example (With specific destination):
665
+
666
+ ```python
667
+ @env.task
668
+ def upload_to_specific_path() -> File:
669
+ remote_file = File.from_local_sync("/tmp/data.csv", "s3://my-bucket/data.csv")
670
+ return remote_file
671
+ ```
357
672
 
358
673
  Args:
359
674
  local_path: Path to the local file
360
- remote_destination: Optional path to store the file remotely. If None, a path will be generated.
675
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
676
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
677
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
678
+ the hash during upload. If not specified, the cache key will be based on file attributes.
361
679
 
362
680
  Returns:
363
- A new File instance pointing to the uploaded file
364
-
365
- Example:
366
- ```python
367
- remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
368
- ```
681
+ A new File instance pointing to the uploaded remote file
369
682
  """
370
683
  if not os.path.exists(local_path):
371
684
  raise ValueError(f"File not found: {local_path}")
@@ -376,20 +689,148 @@ class File(BaseModel, Generic[T], SerializableType):
376
689
 
377
690
  # If remote_destination was not set by the user, and the configured raw data path is also local,
378
691
  # then let's optimize by not uploading.
692
+ hash_value = hash_method if isinstance(hash_method, str) else None
693
+ hash_method_obj = hash_method if isinstance(hash_method, HashMethod) else None
694
+
379
695
  if "file" in protocol:
380
696
  if remote_destination is None:
381
697
  path = str(Path(local_path).absolute())
382
698
  else:
383
699
  # Otherwise, actually make a copy of the file
384
- async with aiofiles.open(remote_path, "rb") as src:
385
- async with aiofiles.open(local_path, "wb") as dst:
386
- await dst.write(await src.read())
700
+ import shutil
701
+
702
+ if hash_method_obj:
703
+ # For hash computation, we need to read and write manually
704
+ with open(local_path, "rb") as src:
705
+ with open(remote_path, "wb") as dst:
706
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method_obj)
707
+ dst_wrapper.write(src.read())
708
+ hash_value = dst_wrapper.result()
709
+ dst_wrapper.close()
710
+ else:
711
+ shutil.copy2(local_path, remote_path)
712
+ path = str(Path(remote_path).absolute())
713
+ else:
714
+ # Otherwise upload to remote using sync storage layer
715
+ fs = storage.get_underlying_filesystem(path=remote_path)
716
+
717
+ if hash_method_obj:
718
+ # We can skip the wrapper if the hash method is just a precomputed value
719
+ if not isinstance(hash_method_obj, PrecomputedValue):
720
+ with open(local_path, "rb") as src:
721
+ # For sync operations, we need to compute hash manually
722
+ data = src.read()
723
+ hash_method_obj.update(memoryview(data))
724
+ hash_value = hash_method_obj.result()
725
+
726
+ # Now write the data to remote
727
+ with fs.open(remote_path, "wb") as dst:
728
+ dst.write(data)
729
+ path = remote_path
730
+ else:
731
+ # Use sync file operations
732
+ with open(local_path, "rb") as src:
733
+ with fs.open(remote_path, "wb") as dst:
734
+ dst.write(src.read())
735
+ path = remote_path
736
+ hash_value = hash_method_obj.result()
737
+ else:
738
+ # Simple sync copy
739
+ with open(local_path, "rb") as src:
740
+ with fs.open(remote_path, "wb") as dst:
741
+ dst.write(src.read())
742
+ path = remote_path
743
+
744
+ f = cls(path=path, name=filename, hash_method=hash_method_obj, hash=hash_value)
745
+ return f
746
+
747
+ @classmethod
748
+ @requires_initialization
749
+ async def from_local(
750
+ cls,
751
+ local_path: Union[str, Path],
752
+ remote_destination: Optional[str] = None,
753
+ hash_method: Optional[HashMethod | str] = None,
754
+ ) -> File[T]:
755
+ """
756
+ Asynchronously create a new File object from a local file by uploading it to remote storage.
757
+
758
+ Use this in async tasks when you have a local file that needs to be uploaded to remote storage.
759
+
760
+ Example (Async):
761
+
762
+ ```python
763
+ @env.task
764
+ async def upload_local_file() -> File:
765
+ # Create a local file
766
+ async with aiofiles.open("/tmp/data.csv", "w") as f:
767
+ await f.write("col1,col2\n1,2\n3,4\n")
768
+
769
+ # Upload to remote storage
770
+ remote_file = await File.from_local("/tmp/data.csv")
771
+ return remote_file
772
+ ```
773
+
774
+ Example (With specific destination):
775
+
776
+ ```python
777
+ @env.task
778
+ async def upload_to_specific_path() -> File:
779
+ remote_file = await File.from_local("/tmp/data.csv", "s3://my-bucket/data.csv")
780
+ return remote_file
781
+ ```
782
+
783
+ Args:
784
+ local_path: Path to the local file
785
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
786
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
787
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
788
+ the hash during upload. If not specified, the cache key will be based on file attributes.
789
+
790
+ Returns:
791
+ A new File instance pointing to the uploaded remote file
792
+ """
793
+ if not os.path.exists(local_path):
794
+ raise ValueError(f"File not found: {local_path}")
795
+
796
+ filename = Path(local_path).name
797
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(filename)
798
+ protocol = get_protocol(remote_path)
799
+
800
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
801
+ # then let's optimize by not uploading.
802
+ hash_value = hash_method if isinstance(hash_method, str) else None
803
+ hash_method = hash_method if isinstance(hash_method, HashMethod) else None
804
+ if "file" in protocol:
805
+ if remote_destination is None:
806
+ path = str(Path(local_path).absolute())
807
+ else:
808
+ # Otherwise, actually make a copy of the file
809
+ async with aiofiles.open(local_path, "rb") as src:
810
+ async with aiofiles.open(remote_path, "wb") as dst:
811
+ if hash_method:
812
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method)
813
+ await dst_wrapper.write(await src.read())
814
+ hash_value = dst_wrapper.result()
815
+ else:
816
+ await dst.write(await src.read())
387
817
  path = str(Path(remote_path).absolute())
388
818
  else:
389
819
  # Otherwise upload to remote using async storage layer
390
- path = await storage.put(str(local_path), remote_path)
820
+ if hash_method:
821
+ # We can skip the wrapper if the hash method is just a precomputed value
822
+ if not isinstance(hash_method, PrecomputedValue):
823
+ async with aiofiles.open(local_path, "rb") as src:
824
+ src_wrapper = AsyncHashingReader(src, accumulator=hash_method)
825
+ path = await storage.put_stream(src_wrapper, to_path=remote_path)
826
+ hash_value = src_wrapper.result()
827
+ else:
828
+ path = await storage.put(str(local_path), remote_path)
829
+ hash_value = hash_method.result()
830
+ else:
831
+ path = await storage.put(str(local_path), remote_path)
391
832
 
392
- f = cls(path=path, name=filename)
833
+ f = cls(path=path, name=filename, hash_method=hash_method, hash=hash_value)
393
834
  return f
394
835
 
395
836
 
@@ -432,7 +873,8 @@ class FileTransformer(TypeTransformer[File]):
432
873
  ),
433
874
  uri=python_val.path,
434
875
  )
435
- )
876
+ ),
877
+ hash=python_val.hash if python_val.hash else None,
436
878
  )
437
879
 
438
880
  async def to_python_value(
@@ -450,7 +892,8 @@ class FileTransformer(TypeTransformer[File]):
450
892
 
451
893
  uri = lv.scalar.blob.uri
452
894
  filename = Path(uri).name
453
- f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
895
+ hash_value = lv.hash if lv.hash else None
896
+ f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format, hash=hash_value)
454
897
  return f
455
898
 
456
899
  def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]: