flyte 0.0.1b0__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (455) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -234
  26. flyte/_initialize.py +443 -294
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +322 -33
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +461 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. {union/_cli → flyte/cli}/_params.py +152 -153
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +3 -0
  121. flyte/config/_config.py +248 -0
  122. flyte/config/_internal.py +73 -0
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +230 -129
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -287
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -152
  195. flyte/_cli/main.py +0 -72
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.0.1b0.dist-info/METADATA +0 -179
  261. flyte-0.0.1b0.dist-info/RECORD +0 -390
  262. flyte-0.0.1b0.dist-info/entry_points.txt +0 -3
  263. union/__init__.py +0 -54
  264. union/_api_commons.py +0 -3
  265. union/_bin/__init__.py +0 -0
  266. union/_bin/runtime.py +0 -113
  267. union/_build.py +0 -25
  268. union/_cache/__init__.py +0 -12
  269. union/_cache/cache.py +0 -141
  270. union/_cache/defaults.py +0 -9
  271. union/_cache/policy_function_body.py +0 -42
  272. union/_cli/__init__.py +0 -0
  273. union/_cli/_common.py +0 -263
  274. union/_cli/_create.py +0 -40
  275. union/_cli/_delete.py +0 -23
  276. union/_cli/_deploy.py +0 -120
  277. union/_cli/_get.py +0 -162
  278. union/_cli/_run.py +0 -150
  279. union/_cli/main.py +0 -72
  280. union/_code_bundle/__init__.py +0 -8
  281. union/_code_bundle/_ignore.py +0 -113
  282. union/_code_bundle/_packaging.py +0 -187
  283. union/_code_bundle/_utils.py +0 -342
  284. union/_code_bundle/bundle.py +0 -176
  285. union/_context.py +0 -146
  286. union/_datastructures.py +0 -295
  287. union/_deploy.py +0 -185
  288. union/_doc.py +0 -29
  289. union/_docstring.py +0 -26
  290. union/_environment.py +0 -43
  291. union/_group.py +0 -31
  292. union/_hash.py +0 -23
  293. union/_image.py +0 -760
  294. union/_initialize.py +0 -585
  295. union/_interface.py +0 -84
  296. union/_internal/__init__.py +0 -3
  297. union/_internal/controllers/__init__.py +0 -77
  298. union/_internal/controllers/_local_controller.py +0 -77
  299. union/_internal/controllers/pbhash.py +0 -39
  300. union/_internal/controllers/remote/__init__.py +0 -40
  301. union/_internal/controllers/remote/_action.py +0 -131
  302. union/_internal/controllers/remote/_client.py +0 -43
  303. union/_internal/controllers/remote/_controller.py +0 -169
  304. union/_internal/controllers/remote/_core.py +0 -341
  305. union/_internal/controllers/remote/_informer.py +0 -260
  306. union/_internal/controllers/remote/_service_protocol.py +0 -44
  307. union/_internal/imagebuild/__init__.py +0 -11
  308. union/_internal/imagebuild/docker_builder.py +0 -416
  309. union/_internal/imagebuild/image_builder.py +0 -243
  310. union/_internal/imagebuild/remote_builder.py +0 -0
  311. union/_internal/resolvers/__init__.py +0 -0
  312. union/_internal/resolvers/_task_module.py +0 -31
  313. union/_internal/resolvers/common.py +0 -24
  314. union/_internal/resolvers/default.py +0 -27
  315. union/_internal/runtime/__init__.py +0 -0
  316. union/_internal/runtime/convert.py +0 -163
  317. union/_internal/runtime/entrypoints.py +0 -121
  318. union/_internal/runtime/io.py +0 -136
  319. union/_internal/runtime/resources_serde.py +0 -134
  320. union/_internal/runtime/task_serde.py +0 -202
  321. union/_internal/runtime/taskrunner.py +0 -179
  322. union/_internal/runtime/types_serde.py +0 -53
  323. union/_logging.py +0 -124
  324. union/_protos/__init__.py +0 -0
  325. union/_protos/common/authorization_pb2.py +0 -66
  326. union/_protos/common/authorization_pb2.pyi +0 -106
  327. union/_protos/common/authorization_pb2_grpc.py +0 -4
  328. union/_protos/common/identifier_pb2.py +0 -71
  329. union/_protos/common/identifier_pb2.pyi +0 -82
  330. union/_protos/common/identifier_pb2_grpc.py +0 -4
  331. union/_protos/common/identity_pb2.py +0 -48
  332. union/_protos/common/identity_pb2.pyi +0 -72
  333. union/_protos/common/identity_pb2_grpc.py +0 -4
  334. union/_protos/common/list_pb2.py +0 -36
  335. union/_protos/common/list_pb2.pyi +0 -69
  336. union/_protos/common/list_pb2_grpc.py +0 -4
  337. union/_protos/common/policy_pb2.py +0 -37
  338. union/_protos/common/policy_pb2.pyi +0 -27
  339. union/_protos/common/policy_pb2_grpc.py +0 -4
  340. union/_protos/common/role_pb2.py +0 -37
  341. union/_protos/common/role_pb2.pyi +0 -51
  342. union/_protos/common/role_pb2_grpc.py +0 -4
  343. union/_protos/common/runtime_version_pb2.py +0 -28
  344. union/_protos/common/runtime_version_pb2.pyi +0 -24
  345. union/_protos/common/runtime_version_pb2_grpc.py +0 -4
  346. union/_protos/logs/dataplane/payload_pb2.py +0 -96
  347. union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  348. union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  349. union/_protos/secret/definition_pb2.py +0 -49
  350. union/_protos/secret/definition_pb2.pyi +0 -93
  351. union/_protos/secret/definition_pb2_grpc.py +0 -4
  352. union/_protos/secret/payload_pb2.py +0 -62
  353. union/_protos/secret/payload_pb2.pyi +0 -94
  354. union/_protos/secret/payload_pb2_grpc.py +0 -4
  355. union/_protos/secret/secret_pb2.py +0 -38
  356. union/_protos/secret/secret_pb2.pyi +0 -6
  357. union/_protos/secret/secret_pb2_grpc.py +0 -198
  358. union/_protos/validate/validate/validate_pb2.py +0 -76
  359. union/_protos/workflow/node_execution_service_pb2.py +0 -26
  360. union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  361. union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  362. union/_protos/workflow/queue_service_pb2.py +0 -75
  363. union/_protos/workflow/queue_service_pb2.pyi +0 -103
  364. union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  365. union/_protos/workflow/run_definition_pb2.py +0 -100
  366. union/_protos/workflow/run_definition_pb2.pyi +0 -256
  367. union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  368. union/_protos/workflow/run_logs_service_pb2.py +0 -41
  369. union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  370. union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  371. union/_protos/workflow/run_service_pb2.py +0 -133
  372. union/_protos/workflow/run_service_pb2.pyi +0 -173
  373. union/_protos/workflow/run_service_pb2_grpc.py +0 -412
  374. union/_protos/workflow/state_service_pb2.py +0 -58
  375. union/_protos/workflow/state_service_pb2.pyi +0 -69
  376. union/_protos/workflow/state_service_pb2_grpc.py +0 -138
  377. union/_protos/workflow/task_definition_pb2.py +0 -72
  378. union/_protos/workflow/task_definition_pb2.pyi +0 -65
  379. union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  380. union/_protos/workflow/task_service_pb2.py +0 -44
  381. union/_protos/workflow/task_service_pb2.pyi +0 -31
  382. union/_protos/workflow/task_service_pb2_grpc.py +0 -104
  383. union/_resources.py +0 -226
  384. union/_retry.py +0 -32
  385. union/_reusable_environment.py +0 -25
  386. union/_run.py +0 -374
  387. union/_secret.py +0 -61
  388. union/_task.py +0 -354
  389. union/_task_environment.py +0 -186
  390. union/_timeout.py +0 -47
  391. union/_tools.py +0 -27
  392. union/_utils/__init__.py +0 -11
  393. union/_utils/asyn.py +0 -119
  394. union/_utils/file_handling.py +0 -71
  395. union/_utils/helpers.py +0 -46
  396. union/_utils/lazy_module.py +0 -54
  397. union/_utils/uv_script_parser.py +0 -49
  398. union/_version.py +0 -21
  399. union/connectors/__init__.py +0 -0
  400. union/errors.py +0 -128
  401. union/extras/__init__.py +0 -5
  402. union/extras/_container.py +0 -263
  403. union/io/__init__.py +0 -11
  404. union/io/_dataframe.py +0 -0
  405. union/io/_dir.py +0 -425
  406. union/io/_file.py +0 -418
  407. union/io/pickle/__init__.py +0 -0
  408. union/io/pickle/transformer.py +0 -117
  409. union/io/structured_dataset/__init__.py +0 -122
  410. union/io/structured_dataset/basic_dfs.py +0 -219
  411. union/io/structured_dataset/structured_dataset.py +0 -1057
  412. union/py.typed +0 -0
  413. union/remote/__init__.py +0 -23
  414. union/remote/_client/__init__.py +0 -0
  415. union/remote/_client/_protocols.py +0 -129
  416. union/remote/_client/auth/__init__.py +0 -12
  417. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  418. union/remote/_client/auth/_authenticators/base.py +0 -391
  419. union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
  420. union/remote/_client/auth/_authenticators/device_code.py +0 -120
  421. union/remote/_client/auth/_authenticators/external_command.py +0 -77
  422. union/remote/_client/auth/_authenticators/factory.py +0 -200
  423. union/remote/_client/auth/_authenticators/pkce.py +0 -515
  424. union/remote/_client/auth/_channel.py +0 -184
  425. union/remote/_client/auth/_client_config.py +0 -83
  426. union/remote/_client/auth/_default_html.py +0 -32
  427. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  428. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
  429. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
  430. union/remote/_client/auth/_keyring.py +0 -154
  431. union/remote/_client/auth/_token_client.py +0 -258
  432. union/remote/_client/auth/errors.py +0 -16
  433. union/remote/_client/controlplane.py +0 -86
  434. union/remote/_data.py +0 -149
  435. union/remote/_logs.py +0 -74
  436. union/remote/_project.py +0 -86
  437. union/remote/_run.py +0 -820
  438. union/remote/_secret.py +0 -132
  439. union/remote/_task.py +0 -193
  440. union/report/__init__.py +0 -3
  441. union/report/_report.py +0 -178
  442. union/report/_template.html +0 -124
  443. union/storage/__init__.py +0 -24
  444. union/storage/_remote_fs.py +0 -34
  445. union/storage/_storage.py +0 -247
  446. union/storage/_utils.py +0 -5
  447. union/types/__init__.py +0 -11
  448. union/types/_renderer.py +0 -162
  449. union/types/_string_literals.py +0 -120
  450. union/types/_type_engine.py +0 -2131
  451. union/types/_utils.py +0 -80
  452. /flyte/{_cli → _debug}/__init__.py +0 -0
  453. /flyte/{_protos → _keyring}/__init__.py +0 -0
  454. {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  455. {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/io/_file.py CHANGED
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import inspect
3
4
  import os
5
+ import typing
4
6
  from contextlib import asynccontextmanager, contextmanager
5
7
  from pathlib import Path
6
8
  from typing import (
7
9
  IO,
10
+ Annotated,
8
11
  Any,
9
12
  AsyncGenerator,
10
13
  Dict,
@@ -17,85 +20,165 @@ from typing import (
17
20
  )
18
21
 
19
22
  import aiofiles
20
- from flyteidl.core import literals_pb2, types_pb2
21
- from fsspec.asyn import AsyncFileSystem
23
+ from flyteidl2.core import literals_pb2, types_pb2
22
24
  from fsspec.utils import get_protocol
23
25
  from mashumaro.types import SerializableType
24
- from pydantic import BaseModel, model_validator
25
- from synchronicity import Synchronizer
26
+ from pydantic import BaseModel, Field, model_validator
27
+ from pydantic.json_schema import SkipJsonSchema
26
28
 
29
+ import flyte.errors
27
30
  import flyte.storage as storage
28
31
  from flyte._context import internal_ctx
29
32
  from flyte._initialize import requires_initialization
30
- from flyte._logging import logger
33
+ from flyte.io._hashing_io import AsyncHashingReader, HashingWriter, HashMethod, PrecomputedValue
31
34
  from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
32
35
 
36
+ if typing.TYPE_CHECKING:
37
+ from obstore import AsyncReadableFile, AsyncWritableFile
38
+
39
+ if typing.TYPE_CHECKING:
40
+ from obstore import AsyncReadableFile, AsyncWritableFile
41
+
33
42
  # Type variable for the file format
34
43
  T = TypeVar("T")
35
44
 
36
- synced = Synchronizer()
37
-
38
45
 
39
46
  class File(BaseModel, Generic[T], SerializableType):
40
47
  """
41
48
  A generic file class representing a file with a specified format.
42
- Provides both async and sync interfaces for file operations.
43
- Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
49
+ Provides both async and sync interfaces for file operations. All methods without _sync suffix are async.
50
+
51
+ The class should be instantiated using one of the class methods. The constructor should be used only to
52
+ instantiate references to existing remote objects.
44
53
 
45
54
  The generic type T represents the format of the file.
46
55
 
47
- Example:
48
- ```python
49
- # Async usage
50
- from pandas import DataFrame
51
- csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
56
+ Important methods:
57
+ - `from_existing_remote`: Create a File object from an existing remote file.
58
+ - `new_remote`: Create a new File reference for a remote file that will be written to.
52
59
 
53
- async with csv_file.open() as f:
54
- content = await f.read()
60
+ **Asynchronous methods**:
61
+ - `open`: Asynchronously open the file and return a file-like object.
62
+ - `download`: Asynchronously download the file to a local path.
63
+ - `from_local`: Asynchronously create a File object from a local file, uploading it to remote storage.
64
+ - `exists`: Asynchronously check if the file exists.
55
65
 
56
- # Sync alternative
57
- with csv_file.open_sync() as f:
58
- content = f.read()
59
- ```
66
+ **Synchronous methods** (suffixed with `_sync`):
67
+ - `open_sync`: Synchronously open the file and return a file-like object.
68
+ - `download_sync`: Synchronously download the file to a local path.
69
+ - `from_local_sync`: Synchronously create a File object from a local file, uploading it to remote storage.
70
+ - `exists_sync`: Synchronously check if the file exists.
60
71
 
61
- Example: Read a file input in a Task.
62
- ```
72
+ Example: Read a file input in a Task (Async).
73
+
74
+ ```python
63
75
  @env.task
64
- async def my_task(file: File[DataFrame]):
65
- async with file.open() as f:
66
- df = pd.read_csv(f)
76
+ async def read_file(file: File) -> str:
77
+ async with file.open("rb") as f:
78
+ content = bytes(await f.read())
79
+ return content.decode("utf-8")
67
80
  ```
68
81
 
69
- Example: Write a file by streaming it directly to blob storage
82
+ Example: Read a file input in a Task (Sync).
83
+
84
+ ```python
85
+ @env.task
86
+ def read_file_sync(file: File) -> str:
87
+ with file.open_sync("rb") as f:
88
+ content = f.read()
89
+ return content.decode("utf-8")
70
90
  ```
91
+
92
+ Example: Write a file by streaming it directly to blob storage (Async).
93
+
94
+ ```python
71
95
  @env.task
72
- async def my_task() -> File[DataFrame]:
73
- df = pd.DataFrame(...)
96
+ async def write_file() -> File:
74
97
  file = File.new_remote()
75
98
  async with file.open("wb") as f:
76
- df.to_csv(f)
77
- # No additional uploading will be done here.
99
+ await f.write(b"Hello, World!")
78
100
  return file
79
101
  ```
80
- Example: Write a file by writing it locally first, and then uploading it.
102
+
103
+ Example: Upload a local file to remote storage (Async).
104
+
105
+ ```python
106
+ @env.task
107
+ async def upload_file() -> File:
108
+ # Write to local file first
109
+ with open("/tmp/data.csv", "w") as f:
110
+ f.write("col1,col2\\n1,2\\n3,4\\n")
111
+ # Upload to remote storage
112
+ return await File.from_local("/tmp/data.csv")
81
113
  ```
114
+
115
+ Example: Upload a local file to remote storage (Sync).
116
+
117
+ ```python
118
+ @env.task
119
+ def upload_file_sync() -> File:
120
+ # Write to local file first
121
+ with open("/tmp/data.csv", "w") as f:
122
+ f.write("col1,col2\\n1,2\\n3,4\\n")
123
+ # Upload to remote storage
124
+ return File.from_local_sync("/tmp/data.csv")
125
+ ```
126
+
127
+ Example: Download a file to local storage (Async).
128
+
129
+ ```python
130
+ @env.task
131
+ async def download_file(file: File) -> str:
132
+ local_path = await file.download()
133
+ # Process the local file
134
+ with open(local_path, "r") as f:
135
+ return f.read()
136
+ ```
137
+
138
+ Example: Download a file to local storage (Sync).
139
+
140
+ ```python
82
141
  @env.task
83
- async def my_task() -> File[DataFrame]:
84
- # write to /tmp/data.csv
85
- return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
142
+ def download_file_sync(file: File) -> str:
143
+ local_path = file.download_sync()
144
+ # Process the local file
145
+ with open(local_path, "r") as f:
146
+ return f.read()
86
147
  ```
87
148
 
88
- Example: From an existing remote file
149
+ Example: Reference an existing remote file.
150
+
151
+ ```python
152
+ @env.task
153
+ async def process_existing_file() -> str:
154
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
155
+ async with file.open("rb") as f:
156
+ content = await f.read()
157
+ return content.decode("utf-8")
89
158
  ```
159
+
160
+ Example: Check if a file exists (Async).
161
+
162
+ ```python
90
163
  @env.task
91
- async def my_task() -> File[DataFrame]:
92
- return File.from_existing_remote("s3://my-bucket/data.csv")
164
+ async def check_file(file: File) -> bool:
165
+ return await file.exists()
93
166
  ```
94
167
 
95
- Example: Take a remote file as input and return the same one, should not do any copy
168
+ Example: Check if a file exists (Sync).
169
+
170
+ ```python
171
+ @env.task
172
+ def check_file_sync(file: File) -> bool:
173
+ return file.exists_sync()
96
174
  ```
175
+
176
+ Example: Pass through a file without copying.
177
+
178
+ ```python
97
179
  @env.task
98
- async def my_task(file: File[DataFrame]) -> File[DataFrame]:
180
+ async def pass_through(file: File) -> File:
181
+ # No copy occurs - just passes the reference
99
182
  return file
100
183
  ```
101
184
 
@@ -107,6 +190,8 @@ class File(BaseModel, Generic[T], SerializableType):
107
190
  path: str
108
191
  name: Optional[str] = None
109
192
  format: str = ""
193
+ hash: Optional[str] = None
194
+ hash_method: Annotated[Optional[HashMethod], Field(default=None, exclude=True), SkipJsonSchema()] = None
110
195
 
111
196
  class Config:
112
197
  arbitrary_types_allowed = True
@@ -114,20 +199,24 @@ class File(BaseModel, Generic[T], SerializableType):
114
199
  @model_validator(mode="before")
115
200
  @classmethod
116
201
  def pre_init(cls, data):
202
+ """Internal: Pydantic validator to set default name from path. Not intended for direct use."""
117
203
  if data.get("name") is None:
118
204
  data["name"] = Path(data["path"]).name
119
205
  return data
120
206
 
121
207
  def _serialize(self) -> Dict[str, Optional[str]]:
208
+ """Internal: Serialize File to dictionary. Not intended for direct use."""
122
209
  pyd_dump = self.model_dump()
123
210
  return pyd_dump
124
211
 
125
212
  @classmethod
126
213
  def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
214
+ """Internal: Deserialize File from dictionary. Not intended for direct use."""
127
215
  return File.model_validate(file_dump)
128
216
 
129
217
  @classmethod
130
218
  def schema_match(cls, incoming: dict):
219
+ """Internal: Check if incoming schema matches File schema. Not intended for direct use."""
131
220
  this_schema = cls.model_json_schema()
132
221
  current_required = this_schema.get("required")
133
222
  incoming_required = incoming.get("required")
@@ -142,41 +231,69 @@ class File(BaseModel, Generic[T], SerializableType):
142
231
 
143
232
  @classmethod
144
233
  @requires_initialization
145
- def new_remote(cls) -> File[T]:
234
+ def new_remote(cls, file_name: Optional[str] = None, hash_method: Optional[HashMethod | str] = None) -> File[T]:
146
235
  """
147
236
  Create a new File reference for a remote file that will be written to.
148
237
 
149
- Example:
150
- ```
238
+ Use this when you want to create a new file and write to it directly without creating a local file first.
239
+
240
+ Example (Async):
241
+
242
+ ```python
151
243
  @env.task
152
- async def my_task() -> File[DataFrame]:
153
- df = pd.DataFrame(...)
244
+ async def create_csv() -> File:
245
+ df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
154
246
  file = File.new_remote()
155
247
  async with file.open("wb") as f:
156
248
  df.to_csv(f)
157
249
  return file
158
250
  ```
251
+
252
+ Args:
253
+ file_name: Optional string specifying a remote file name. If not set,
254
+ a generated file name will be returned.
255
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
256
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will be used
257
+ to compute the hash as data is written.
258
+
259
+ Returns:
260
+ A new File instance with a generated remote path
159
261
  """
160
262
  ctx = internal_ctx()
263
+ known_cache_key = hash_method if isinstance(hash_method, str) else None
264
+ method = hash_method if isinstance(hash_method, HashMethod) else None
161
265
 
162
- return cls(path=ctx.raw_data.get_random_remote_path())
266
+ return cls(
267
+ path=ctx.raw_data.get_random_remote_path(file_name=file_name), hash=known_cache_key, hash_method=method
268
+ )
163
269
 
164
270
  @classmethod
165
- def from_existing_remote(cls, remote_path: str) -> File[T]:
271
+ def from_existing_remote(cls, remote_path: str, file_cache_key: Optional[str] = None) -> File[T]:
166
272
  """
167
273
  Create a File reference from an existing remote file.
168
274
 
275
+ Use this when you want to reference a file that already exists in remote storage without uploading it.
276
+
169
277
  Example:
278
+
170
279
  ```python
171
280
  @env.task
172
- async def my_task() -> File[DataFrame]:
173
- return File.from_existing_remote("s3://my-bucket/data.csv")
281
+ async def process_existing_file() -> str:
282
+ file = File.from_existing_remote("s3://my-bucket/data.csv")
283
+ async with file.open("rb") as f:
284
+ content = await f.read()
285
+ return content.decode("utf-8")
174
286
  ```
175
287
 
176
288
  Args:
177
289
  remote_path: The remote path to the existing file
290
+ file_cache_key: Optional hash value to use for cache key computation. If not specified, the cache key
291
+ will be computed based on the file's attributes (path, name, format).
292
+
293
+ Returns:
294
+ A new File instance pointing to the existing remote file
178
295
  """
179
- return cls(path=remote_path)
296
+ return cls(path=remote_path, hash=file_cache_key)
180
297
 
181
298
  @asynccontextmanager
182
299
  async def open(
@@ -187,82 +304,129 @@ class File(BaseModel, Generic[T], SerializableType):
187
304
  cache_options: Optional[dict] = None,
188
305
  compression: Optional[str] = None,
189
306
  **kwargs,
190
- ) -> AsyncGenerator[IO[Any]]:
307
+ ) -> AsyncGenerator[Union[AsyncWritableFile, AsyncReadableFile, "HashingWriter"], None]:
191
308
  """
192
309
  Asynchronously open the file and return a file-like object.
193
310
 
311
+ Use this method in async tasks to read from or write to files directly.
312
+
313
+ Example (Async Read):
314
+
315
+ ```python
316
+ @env.task
317
+ async def read_file(f: File) -> str:
318
+ async with f.open("rb") as fh:
319
+ content = bytes(await fh.read())
320
+ return content.decode("utf-8")
321
+ ```
322
+
323
+ Example (Async Write):
324
+
325
+ ```python
326
+ @env.task
327
+ async def write_file() -> File:
328
+ f = File.new_remote()
329
+ async with f.open("wb") as fh:
330
+ await fh.write(b"Hello, World!")
331
+ return f
332
+ ```
333
+
334
+ Example (Streaming Read):
335
+
336
+ ```python
337
+ @env.task
338
+ async def stream_read(f: File) -> str:
339
+ content_parts = []
340
+ async with f.open("rb", block_size=1024) as fh:
341
+ while True:
342
+ chunk = await fh.read()
343
+ if not chunk:
344
+ break
345
+ content_parts.append(chunk)
346
+ return b"".join(content_parts).decode("utf-8")
347
+ ```
348
+
194
349
  Args:
195
- mode: The mode to open the file in (default: 'rb')
196
- block_size: Size of blocks for reading (bytes)
350
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
351
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
352
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
197
353
  cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
198
354
  cache_options: Dictionary of options for the cache
199
355
  compression: Compression format or None for auto-detection
200
356
  **kwargs: Additional arguments passed to fsspec's open method
201
357
 
202
358
  Returns:
203
- An async file-like object
204
-
205
- Example:
206
- ```python
207
- async with file.open('rb') as f:
208
- data = await f.read()
209
- ```
359
+ An async file-like object that can be used with async read/write operations
210
360
  """
211
- fs = storage.get_underlying_filesystem(path=self.path)
212
-
213
- # Set up cache options if provided
214
- if cache_options is None:
215
- cache_options = {}
216
-
217
- # Configure the open parameters
218
- open_kwargs = {"mode": mode, **kwargs}
219
- if compression:
220
- open_kwargs["compression"] = compression
221
-
222
- if block_size:
223
- open_kwargs["block_size"] = block_size
224
-
225
- # Apply caching strategy
226
- if cache_type != "none":
227
- open_kwargs["cache_type"] = cache_type
228
- open_kwargs["cache_options"] = cache_options
229
-
230
- # Use aiofiles for local files
231
- if fs.protocol == "file":
232
- async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
233
- yield f
234
- else:
235
- # This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
236
- file_handle = None
361
+ # Check if we should use obstore bypass
362
+ try:
363
+ fh = await storage.open(
364
+ self.path,
365
+ mode=mode,
366
+ cache_type=cache_type,
367
+ cache_options=cache_options,
368
+ compression=compression,
369
+ block_size=block_size,
370
+ **kwargs,
371
+ )
237
372
  try:
238
- if isinstance(fs, AsyncFileSystem):
239
- file_handle = await fs.open_async(self.path, mode)
240
- yield file_handle
241
- return
242
- except NotImplementedError:
243
- logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
373
+ yield fh
374
+ return
244
375
  finally:
245
- if file_handle is not None:
246
- file_handle.close()
376
+ if inspect.iscoroutinefunction(fh.close):
377
+ await fh.close()
378
+ else:
379
+ fh.close()
380
+ except flyte.errors.OnlyAsyncIOSupportedError:
381
+ # Fall back to aiofiles
382
+ fs = storage.get_underlying_filesystem(path=self.path)
383
+ if "file" in fs.protocol:
384
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
385
+ yield f
386
+ return
387
+ raise
388
+
389
+ async def exists(self) -> bool:
390
+ """
391
+ Asynchronously check if the file exists.
247
392
 
248
- with fs.open(self.path, mode) as file_handle:
249
- yield file_handle
393
+ Example (Async):
394
+
395
+ ```python
396
+ @env.task
397
+ async def check_file(f: File) -> bool:
398
+ if await f.exists():
399
+ print("File exists!")
400
+ return True
401
+ return False
402
+ ```
403
+
404
+ Returns:
405
+ True if the file exists, False otherwise
406
+ """
407
+ return await storage.exists(self.path)
250
408
 
251
409
  def exists_sync(self) -> bool:
252
410
  """
253
411
  Synchronously check if the file exists.
254
412
 
413
+ Use this in non-async tasks or when you need synchronous file existence checking.
414
+
415
+ Example (Sync):
416
+
417
+ ```python
418
+ @env.task
419
+ def check_file_sync(f: File) -> bool:
420
+ if f.exists_sync():
421
+ print("File exists!")
422
+ return True
423
+ return False
424
+ ```
425
+
255
426
  Returns:
256
427
  True if the file exists, False otherwise
257
-
258
- Example:
259
- ```python
260
- if file.exists_sync():
261
- # Process the file
262
- ```
263
428
  """
264
- fs = storage.get_underlying_filesystem(path=self.path)
265
- return fs.exists(self.path)
429
+ return storage.exists_sync(self.path)
266
430
 
267
431
  @contextmanager
268
432
  def open_sync(
@@ -273,26 +437,44 @@ class File(BaseModel, Generic[T], SerializableType):
273
437
  cache_options: Optional[dict] = None,
274
438
  compression: Optional[str] = None,
275
439
  **kwargs,
276
- ) -> Generator[IO[Any]]:
440
+ ) -> Generator[IO[Any], None, None]:
277
441
  """
278
442
  Synchronously open the file and return a file-like object.
279
443
 
444
+ Use this method in non-async tasks to read from or write to files directly.
445
+
446
+ Example (Sync Read):
447
+
448
+ ```python
449
+ @env.task
450
+ def read_file_sync(f: File) -> str:
451
+ with f.open_sync("rb") as fh:
452
+ content = fh.read()
453
+ return content.decode("utf-8")
454
+ ```
455
+
456
+ Example (Sync Write):
457
+
458
+ ```python
459
+ @env.task
460
+ def write_file_sync() -> File:
461
+ f = File.new_remote()
462
+ with f.open_sync("wb") as fh:
463
+ fh.write(b"Hello, World!")
464
+ return f
465
+ ```
466
+
280
467
  Args:
281
- mode: The mode to open the file in (default: 'rb')
282
- block_size: Size of blocks for reading (bytes)
468
+ mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
469
+ 'wb' (write binary), 'rt' (read text), 'wt' (write text)
470
+ block_size: Size of blocks for reading in bytes. Useful for streaming large files.
283
471
  cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
284
472
  cache_options: Dictionary of options for the cache
285
473
  compression: Compression format or None for auto-detection
286
474
  **kwargs: Additional arguments passed to fsspec's open method
287
475
 
288
476
  Returns:
289
- A file-like object
290
-
291
- Example:
292
- ```python
293
- with file.open_sync('rb') as f:
294
- data = f.read()
295
- ```
477
+ A file-like object that can be used with standard read/write operations
296
478
  """
297
479
  fs = storage.get_underlying_filesystem(path=self.path)
298
480
 
@@ -314,59 +496,193 @@ class File(BaseModel, Generic[T], SerializableType):
314
496
  with fs.open(self.path, **open_kwargs) as f:
315
497
  yield f
316
498
 
317
- # @synced.wrap - enabling this did not work - synchronicity/pydantic issue
499
+ # TODO sync needs to be implemented
318
500
  async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
319
501
  """
320
502
  Asynchronously download the file to a local path.
321
503
 
504
+ Use this when you need to download a remote file to your local filesystem for processing.
505
+
506
+ Example (Async):
507
+
508
+ ```python
509
+ @env.task
510
+ async def download_and_process(f: File) -> str:
511
+ local_path = await f.download()
512
+ # Now process the local file
513
+ with open(local_path, "r") as fh:
514
+ return fh.read()
515
+ ```
516
+
517
+ Example (Download to specific path):
518
+
519
+ ```python
520
+ @env.task
521
+ async def download_to_path(f: File) -> str:
522
+ local_path = await f.download("/tmp/myfile.csv")
523
+ return local_path
524
+ ```
525
+
322
526
  Args:
323
527
  local_path: The local path to download the file to. If None, a temporary
324
- directory will be used.
528
+ directory will be used and a path will be generated.
325
529
 
326
530
  Returns:
327
- The path to the downloaded file
328
-
329
- Example:
330
- ```python
331
- local_file = await file.download('/tmp/myfile.csv')
332
- ```
531
+ The absolute path to the downloaded file
333
532
  """
334
533
  if local_path is None:
335
- local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
534
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
336
535
  else:
536
+ # Preserve trailing separator if present (Path.absolute() strips it)
537
+ local_path_str = str(local_path)
538
+ has_trailing_sep = local_path_str.endswith(os.sep)
337
539
  local_path = str(Path(local_path).absolute())
540
+ if has_trailing_sep:
541
+ local_path = local_path + os.sep
338
542
 
339
543
  fs = storage.get_underlying_filesystem(path=self.path)
340
544
 
341
545
  # If it's already a local file, just copy it
342
546
  if "file" in fs.protocol:
547
+ # Apply directory logic for local-to-local copies
548
+ local_path_for_copy = local_path
549
+ if isinstance(local_path, str):
550
+ local_path_obj = Path(local_path)
551
+ # Check if it's a directory or ends with separator
552
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
553
+ remote_filename = Path(self.path).name
554
+ local_path_for_copy = str(local_path_obj / remote_filename)
555
+
556
+ # Ensure parent directory exists
557
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
558
+
343
559
  # Use aiofiles for async copy
344
560
  async with aiofiles.open(self.path, "rb") as src:
345
- async with aiofiles.open(local_path, "wb") as dst:
561
+ async with aiofiles.open(local_path_for_copy, "wb") as dst:
346
562
  await dst.write(await src.read())
347
- return str(local_path)
563
+ return str(local_path_for_copy)
348
564
 
349
565
  # Otherwise download from remote using async functionality
350
- await storage.get(self.path, str(local_path))
566
+ result_path = await storage.get(self.path, str(local_path))
567
+ return result_path
568
+
569
+ def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
570
+ """
571
+ Synchronously download the file to a local path.
572
+
573
+ Use this in non-async tasks when you need to download a remote file to your local filesystem.
574
+
575
+ Example (Sync):
576
+
577
+ ```python
578
+ @env.task
579
+ def download_and_process_sync(f: File) -> str:
580
+ local_path = f.download_sync()
581
+ # Now process the local file
582
+ with open(local_path, "r") as fh:
583
+ return fh.read()
584
+ ```
585
+
586
+ Example (Download to specific path):
587
+
588
+ ```python
589
+ @env.task
590
+ def download_to_path_sync(f: File) -> str:
591
+ local_path = f.download_sync("/tmp/myfile.csv")
592
+ return local_path
593
+ ```
594
+
595
+ Args:
596
+ local_path: The local path to download the file to. If None, a temporary
597
+ directory will be used and a path will be generated.
598
+
599
+ Returns:
600
+ The absolute path to the downloaded file
601
+ """
602
+ if local_path is None:
603
+ local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
604
+ else:
605
+ # Preserve trailing separator if present (Path.absolute() strips it)
606
+ local_path_str = str(local_path)
607
+ has_trailing_sep = local_path_str.endswith(os.sep)
608
+ local_path = str(Path(local_path).absolute())
609
+ if has_trailing_sep:
610
+ local_path = local_path + os.sep
611
+
612
+ fs = storage.get_underlying_filesystem(path=self.path)
613
+
614
+ # If it's already a local file, just copy it
615
+ if "file" in fs.protocol:
616
+ # Apply directory logic for local-to-local copies
617
+ local_path_for_copy = local_path
618
+ if isinstance(local_path, str):
619
+ local_path_obj = Path(local_path)
620
+ # Check if it's a directory or ends with separator
621
+ if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
622
+ remote_filename = Path(self.path).name
623
+ local_path_for_copy = str(local_path_obj / remote_filename)
624
+
625
+ # Ensure parent directory exists
626
+ Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
627
+
628
+ # Use standard file operations for sync copy
629
+ import shutil
630
+
631
+ shutil.copy2(self.path, local_path_for_copy)
632
+ return str(local_path_for_copy)
633
+
634
+ # Otherwise download from remote using sync functionality
635
+ # Use the sync version of storage operations
636
+ with fs.open(self.path, "rb") as src:
637
+ with open(local_path, "wb") as dst:
638
+ dst.write(src.read())
351
639
  return str(local_path)
352
640
 
353
641
  @classmethod
354
642
  @requires_initialization
355
- async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
643
+ def from_local_sync(
644
+ cls,
645
+ local_path: Union[str, Path],
646
+ remote_destination: Optional[str] = None,
647
+ hash_method: Optional[HashMethod | str] = None,
648
+ ) -> File[T]:
356
649
  """
357
- Create a new File object from a local file that will be uploaded to the configured remote store.
650
+ Synchronously create a new File object from a local file by uploading it to remote storage.
651
+
652
+ Use this in non-async tasks when you have a local file that needs to be uploaded to remote storage.
653
+
654
+ Example (Sync):
655
+
656
+ ```python
657
+ @env.task
658
+ def upload_local_file_sync() -> File:
659
+ # Create a local file
660
+ with open("/tmp/data.csv", "w") as f:
661
+ f.write("col1,col2\n1,2\n3,4\n")
662
+
663
+ # Upload to remote storage
664
+ remote_file = File.from_local_sync("/tmp/data.csv")
665
+ return remote_file
666
+ ```
667
+
668
+ Example (With specific destination):
669
+
670
+ ```python
671
+ @env.task
672
+ def upload_to_specific_path() -> File:
673
+ remote_file = File.from_local_sync("/tmp/data.csv", "s3://my-bucket/data.csv")
674
+ return remote_file
675
+ ```
358
676
 
359
677
  Args:
360
678
  local_path: Path to the local file
361
- remote_destination: Optional path to store the file remotely. If None, a path will be generated.
679
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
680
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
681
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
682
+ the hash during upload. If not specified, the cache key will be based on file attributes.
362
683
 
363
684
  Returns:
364
- A new File instance pointing to the uploaded file
365
-
366
- Example:
367
- ```python
368
- remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
369
- ```
685
+ A new File instance pointing to the uploaded remote file
370
686
  """
371
687
  if not os.path.exists(local_path):
372
688
  raise ValueError(f"File not found: {local_path}")
@@ -377,20 +693,148 @@ class File(BaseModel, Generic[T], SerializableType):
377
693
 
378
694
  # If remote_destination was not set by the user, and the configured raw data path is also local,
379
695
  # then let's optimize by not uploading.
696
+ hash_value = hash_method if isinstance(hash_method, str) else None
697
+ hash_method_obj = hash_method if isinstance(hash_method, HashMethod) else None
698
+
380
699
  if "file" in protocol:
381
700
  if remote_destination is None:
382
701
  path = str(Path(local_path).absolute())
383
702
  else:
384
703
  # Otherwise, actually make a copy of the file
385
- async with aiofiles.open(remote_path, "rb") as src:
386
- async with aiofiles.open(local_path, "wb") as dst:
387
- await dst.write(await src.read())
704
+ import shutil
705
+
706
+ if hash_method_obj:
707
+ # For hash computation, we need to read and write manually
708
+ with open(local_path, "rb") as src:
709
+ with open(remote_path, "wb") as dst:
710
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method_obj)
711
+ dst_wrapper.write(src.read())
712
+ hash_value = dst_wrapper.result()
713
+ dst_wrapper.close()
714
+ else:
715
+ shutil.copy2(local_path, remote_path)
716
+ path = str(Path(remote_path).absolute())
717
+ else:
718
+ # Otherwise upload to remote using sync storage layer
719
+ fs = storage.get_underlying_filesystem(path=remote_path)
720
+
721
+ if hash_method_obj:
722
+ # We can skip the wrapper if the hash method is just a precomputed value
723
+ if not isinstance(hash_method_obj, PrecomputedValue):
724
+ with open(local_path, "rb") as src:
725
+ # For sync operations, we need to compute hash manually
726
+ data = src.read()
727
+ hash_method_obj.update(memoryview(data))
728
+ hash_value = hash_method_obj.result()
729
+
730
+ # Now write the data to remote
731
+ with fs.open(remote_path, "wb") as dst:
732
+ dst.write(data)
733
+ path = remote_path
734
+ else:
735
+ # Use sync file operations
736
+ with open(local_path, "rb") as src:
737
+ with fs.open(remote_path, "wb") as dst:
738
+ dst.write(src.read())
739
+ path = remote_path
740
+ hash_value = hash_method_obj.result()
741
+ else:
742
+ # Simple sync copy
743
+ with open(local_path, "rb") as src:
744
+ with fs.open(remote_path, "wb") as dst:
745
+ dst.write(src.read())
746
+ path = remote_path
747
+
748
+ f = cls(path=path, name=filename, hash_method=hash_method_obj, hash=hash_value)
749
+ return f
750
+
751
+ @classmethod
752
+ @requires_initialization
753
+ async def from_local(
754
+ cls,
755
+ local_path: Union[str, Path],
756
+ remote_destination: Optional[str] = None,
757
+ hash_method: Optional[HashMethod | str] = None,
758
+ ) -> File[T]:
759
+ """
760
+ Asynchronously create a new File object from a local file by uploading it to remote storage.
761
+
762
+ Use this in async tasks when you have a local file that needs to be uploaded to remote storage.
763
+
764
+ Example (Async):
765
+
766
+ ```python
767
+ @env.task
768
+ async def upload_local_file() -> File:
769
+ # Create a local file
770
+ async with aiofiles.open("/tmp/data.csv", "w") as f:
771
+ await f.write("col1,col2\n1,2\n3,4\n")
772
+
773
+ # Upload to remote storage
774
+ remote_file = await File.from_local("/tmp/data.csv")
775
+ return remote_file
776
+ ```
777
+
778
+ Example (With specific destination):
779
+
780
+ ```python
781
+ @env.task
782
+ async def upload_to_specific_path() -> File:
783
+ remote_file = await File.from_local("/tmp/data.csv", "s3://my-bucket/data.csv")
784
+ return remote_file
785
+ ```
786
+
787
+ Args:
788
+ local_path: Path to the local file
789
+ remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
790
+ hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
791
+ it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
792
+ the hash during upload. If not specified, the cache key will be based on file attributes.
793
+
794
+ Returns:
795
+ A new File instance pointing to the uploaded remote file
796
+ """
797
+ if not os.path.exists(local_path):
798
+ raise ValueError(f"File not found: {local_path}")
799
+
800
+ filename = Path(local_path).name
801
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(filename)
802
+ protocol = get_protocol(remote_path)
803
+
804
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
805
+ # then let's optimize by not uploading.
806
+ hash_value = hash_method if isinstance(hash_method, str) else None
807
+ hash_method = hash_method if isinstance(hash_method, HashMethod) else None
808
+ if "file" in protocol:
809
+ if remote_destination is None:
810
+ path = str(Path(local_path).absolute())
811
+ else:
812
+ # Otherwise, actually make a copy of the file
813
+ async with aiofiles.open(local_path, "rb") as src:
814
+ async with aiofiles.open(remote_path, "wb") as dst:
815
+ if hash_method:
816
+ dst_wrapper = HashingWriter(dst, accumulator=hash_method)
817
+ await dst_wrapper.write(await src.read())
818
+ hash_value = dst_wrapper.result()
819
+ else:
820
+ await dst.write(await src.read())
388
821
  path = str(Path(remote_path).absolute())
389
822
  else:
390
823
  # Otherwise upload to remote using async storage layer
391
- path = await storage.put(str(local_path), remote_path)
824
+ if hash_method:
825
+ # We can skip the wrapper if the hash method is just a precomputed value
826
+ if not isinstance(hash_method, PrecomputedValue):
827
+ async with aiofiles.open(local_path, "rb") as src:
828
+ src_wrapper = AsyncHashingReader(src, accumulator=hash_method)
829
+ path = await storage.put_stream(src_wrapper, to_path=remote_path)
830
+ hash_value = src_wrapper.result()
831
+ else:
832
+ path = await storage.put(str(local_path), remote_path)
833
+ hash_value = hash_method.result()
834
+ else:
835
+ path = await storage.put(str(local_path), remote_path)
392
836
 
393
- f = cls(path=path, name=filename)
837
+ f = cls(path=path, name=filename, hash_method=hash_method, hash=hash_value)
394
838
  return f
395
839
 
396
840
 
@@ -433,7 +877,8 @@ class FileTransformer(TypeTransformer[File]):
433
877
  ),
434
878
  uri=python_val.path,
435
879
  )
436
- )
880
+ ),
881
+ hash=python_val.hash if python_val.hash else None,
437
882
  )
438
883
 
439
884
  async def to_python_value(
@@ -451,7 +896,8 @@ class FileTransformer(TypeTransformer[File]):
451
896
 
452
897
  uri = lv.scalar.blob.uri
453
898
  filename = Path(uri).name
454
- f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
899
+ hash_value = lv.hash if lv.hash else None
900
+ f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format, hash=hash_value)
455
901
  return f
456
902
 
457
903
  def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]: