flyte 0.0.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (390) hide show
  1. flyte/__init__.py +62 -0
  2. flyte/_api_commons.py +3 -0
  3. flyte/_bin/__init__.py +0 -0
  4. flyte/_bin/runtime.py +126 -0
  5. flyte/_build.py +25 -0
  6. flyte/_cache/__init__.py +12 -0
  7. flyte/_cache/cache.py +146 -0
  8. flyte/_cache/defaults.py +9 -0
  9. flyte/_cache/policy_function_body.py +42 -0
  10. flyte/_cli/__init__.py +0 -0
  11. flyte/_cli/_common.py +287 -0
  12. flyte/_cli/_create.py +42 -0
  13. flyte/_cli/_delete.py +23 -0
  14. flyte/_cli/_deploy.py +140 -0
  15. flyte/_cli/_get.py +235 -0
  16. flyte/_cli/_run.py +152 -0
  17. flyte/_cli/main.py +72 -0
  18. flyte/_code_bundle/__init__.py +8 -0
  19. flyte/_code_bundle/_ignore.py +113 -0
  20. flyte/_code_bundle/_packaging.py +187 -0
  21. flyte/_code_bundle/_utils.py +339 -0
  22. flyte/_code_bundle/bundle.py +178 -0
  23. flyte/_context.py +146 -0
  24. flyte/_datastructures.py +342 -0
  25. flyte/_deploy.py +202 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +43 -0
  29. flyte/_group.py +31 -0
  30. flyte/_hash.py +23 -0
  31. flyte/_image.py +760 -0
  32. flyte/_initialize.py +634 -0
  33. flyte/_interface.py +84 -0
  34. flyte/_internal/__init__.py +3 -0
  35. flyte/_internal/controllers/__init__.py +115 -0
  36. flyte/_internal/controllers/_local_controller.py +118 -0
  37. flyte/_internal/controllers/_trace.py +40 -0
  38. flyte/_internal/controllers/pbhash.py +39 -0
  39. flyte/_internal/controllers/remote/__init__.py +40 -0
  40. flyte/_internal/controllers/remote/_action.py +141 -0
  41. flyte/_internal/controllers/remote/_client.py +43 -0
  42. flyte/_internal/controllers/remote/_controller.py +361 -0
  43. flyte/_internal/controllers/remote/_core.py +402 -0
  44. flyte/_internal/controllers/remote/_informer.py +361 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +11 -0
  47. flyte/_internal/imagebuild/docker_builder.py +416 -0
  48. flyte/_internal/imagebuild/image_builder.py +241 -0
  49. flyte/_internal/imagebuild/remote_builder.py +0 -0
  50. flyte/_internal/resolvers/__init__.py +0 -0
  51. flyte/_internal/resolvers/_task_module.py +54 -0
  52. flyte/_internal/resolvers/common.py +31 -0
  53. flyte/_internal/resolvers/default.py +28 -0
  54. flyte/_internal/runtime/__init__.py +0 -0
  55. flyte/_internal/runtime/convert.py +199 -0
  56. flyte/_internal/runtime/entrypoints.py +135 -0
  57. flyte/_internal/runtime/io.py +136 -0
  58. flyte/_internal/runtime/resources_serde.py +138 -0
  59. flyte/_internal/runtime/task_serde.py +210 -0
  60. flyte/_internal/runtime/taskrunner.py +190 -0
  61. flyte/_internal/runtime/types_serde.py +54 -0
  62. flyte/_logging.py +124 -0
  63. flyte/_protos/__init__.py +0 -0
  64. flyte/_protos/common/authorization_pb2.py +66 -0
  65. flyte/_protos/common/authorization_pb2.pyi +108 -0
  66. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  67. flyte/_protos/common/identifier_pb2.py +71 -0
  68. flyte/_protos/common/identifier_pb2.pyi +82 -0
  69. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  70. flyte/_protos/common/identity_pb2.py +48 -0
  71. flyte/_protos/common/identity_pb2.pyi +72 -0
  72. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  73. flyte/_protos/common/list_pb2.py +36 -0
  74. flyte/_protos/common/list_pb2.pyi +69 -0
  75. flyte/_protos/common/list_pb2_grpc.py +4 -0
  76. flyte/_protos/common/policy_pb2.py +37 -0
  77. flyte/_protos/common/policy_pb2.pyi +27 -0
  78. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  79. flyte/_protos/common/role_pb2.py +37 -0
  80. flyte/_protos/common/role_pb2.pyi +53 -0
  81. flyte/_protos/common/role_pb2_grpc.py +4 -0
  82. flyte/_protos/common/runtime_version_pb2.py +28 -0
  83. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  84. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  85. flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
  86. flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  87. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  88. flyte/_protos/secret/definition_pb2.py +49 -0
  89. flyte/_protos/secret/definition_pb2.pyi +93 -0
  90. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  91. flyte/_protos/secret/payload_pb2.py +62 -0
  92. flyte/_protos/secret/payload_pb2.pyi +94 -0
  93. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  94. flyte/_protos/secret/secret_pb2.py +38 -0
  95. flyte/_protos/secret/secret_pb2.pyi +6 -0
  96. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  97. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  98. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  99. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  100. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  101. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  102. flyte/_protos/workflow/queue_service_pb2.py +106 -0
  103. flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
  104. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  105. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  106. flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
  107. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  108. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  109. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  110. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  111. flyte/_protos/workflow/run_service_pb2.py +133 -0
  112. flyte/_protos/workflow/run_service_pb2.pyi +175 -0
  113. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  114. flyte/_protos/workflow/state_service_pb2.py +58 -0
  115. flyte/_protos/workflow/state_service_pb2.pyi +71 -0
  116. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  117. flyte/_protos/workflow/task_definition_pb2.py +72 -0
  118. flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
  119. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  120. flyte/_protos/workflow/task_service_pb2.py +44 -0
  121. flyte/_protos/workflow/task_service_pb2.pyi +31 -0
  122. flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
  123. flyte/_resources.py +226 -0
  124. flyte/_retry.py +32 -0
  125. flyte/_reusable_environment.py +25 -0
  126. flyte/_run.py +411 -0
  127. flyte/_secret.py +61 -0
  128. flyte/_task.py +367 -0
  129. flyte/_task_environment.py +200 -0
  130. flyte/_timeout.py +47 -0
  131. flyte/_tools.py +27 -0
  132. flyte/_trace.py +128 -0
  133. flyte/_utils/__init__.py +20 -0
  134. flyte/_utils/asyn.py +119 -0
  135. flyte/_utils/coro_management.py +25 -0
  136. flyte/_utils/file_handling.py +72 -0
  137. flyte/_utils/helpers.py +108 -0
  138. flyte/_utils/lazy_module.py +54 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/connectors/__init__.py +0 -0
  142. flyte/errors.py +143 -0
  143. flyte/extras/__init__.py +5 -0
  144. flyte/extras/_container.py +273 -0
  145. flyte/io/__init__.py +11 -0
  146. flyte/io/_dataframe.py +0 -0
  147. flyte/io/_dir.py +448 -0
  148. flyte/io/_file.py +468 -0
  149. flyte/io/pickle/__init__.py +0 -0
  150. flyte/io/pickle/transformer.py +117 -0
  151. flyte/io/structured_dataset/__init__.py +129 -0
  152. flyte/io/structured_dataset/basic_dfs.py +219 -0
  153. flyte/io/structured_dataset/structured_dataset.py +1061 -0
  154. flyte/py.typed +0 -0
  155. flyte/remote/__init__.py +25 -0
  156. flyte/remote/_client/__init__.py +0 -0
  157. flyte/remote/_client/_protocols.py +131 -0
  158. flyte/remote/_client/auth/__init__.py +12 -0
  159. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  160. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  161. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  162. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  163. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  164. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  165. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  166. flyte/remote/_client/auth/_channel.py +184 -0
  167. flyte/remote/_client/auth/_client_config.py +83 -0
  168. flyte/remote/_client/auth/_default_html.py +32 -0
  169. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  170. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  171. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  172. flyte/remote/_client/auth/_keyring.py +143 -0
  173. flyte/remote/_client/auth/_token_client.py +260 -0
  174. flyte/remote/_client/auth/errors.py +16 -0
  175. flyte/remote/_client/controlplane.py +95 -0
  176. flyte/remote/_console.py +18 -0
  177. flyte/remote/_data.py +155 -0
  178. flyte/remote/_logs.py +116 -0
  179. flyte/remote/_project.py +86 -0
  180. flyte/remote/_run.py +873 -0
  181. flyte/remote/_secret.py +132 -0
  182. flyte/remote/_task.py +227 -0
  183. flyte/report/__init__.py +3 -0
  184. flyte/report/_report.py +178 -0
  185. flyte/report/_template.html +124 -0
  186. flyte/storage/__init__.py +24 -0
  187. flyte/storage/_remote_fs.py +34 -0
  188. flyte/storage/_storage.py +251 -0
  189. flyte/storage/_utils.py +5 -0
  190. flyte/types/__init__.py +13 -0
  191. flyte/types/_interface.py +25 -0
  192. flyte/types/_renderer.py +162 -0
  193. flyte/types/_string_literals.py +120 -0
  194. flyte/types/_type_engine.py +2210 -0
  195. flyte/types/_utils.py +80 -0
  196. flyte-0.0.1b0.dist-info/METADATA +179 -0
  197. flyte-0.0.1b0.dist-info/RECORD +390 -0
  198. flyte-0.0.1b0.dist-info/WHEEL +5 -0
  199. flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
  200. flyte-0.0.1b0.dist-info/top_level.txt +1 -0
  201. union/__init__.py +54 -0
  202. union/_api_commons.py +3 -0
  203. union/_bin/__init__.py +0 -0
  204. union/_bin/runtime.py +113 -0
  205. union/_build.py +25 -0
  206. union/_cache/__init__.py +12 -0
  207. union/_cache/cache.py +141 -0
  208. union/_cache/defaults.py +9 -0
  209. union/_cache/policy_function_body.py +42 -0
  210. union/_cli/__init__.py +0 -0
  211. union/_cli/_common.py +263 -0
  212. union/_cli/_create.py +40 -0
  213. union/_cli/_delete.py +23 -0
  214. union/_cli/_deploy.py +120 -0
  215. union/_cli/_get.py +162 -0
  216. union/_cli/_params.py +579 -0
  217. union/_cli/_run.py +150 -0
  218. union/_cli/main.py +72 -0
  219. union/_code_bundle/__init__.py +8 -0
  220. union/_code_bundle/_ignore.py +113 -0
  221. union/_code_bundle/_packaging.py +187 -0
  222. union/_code_bundle/_utils.py +342 -0
  223. union/_code_bundle/bundle.py +176 -0
  224. union/_context.py +146 -0
  225. union/_datastructures.py +295 -0
  226. union/_deploy.py +185 -0
  227. union/_doc.py +29 -0
  228. union/_docstring.py +26 -0
  229. union/_environment.py +43 -0
  230. union/_group.py +31 -0
  231. union/_hash.py +23 -0
  232. union/_image.py +760 -0
  233. union/_initialize.py +585 -0
  234. union/_interface.py +84 -0
  235. union/_internal/__init__.py +3 -0
  236. union/_internal/controllers/__init__.py +77 -0
  237. union/_internal/controllers/_local_controller.py +77 -0
  238. union/_internal/controllers/pbhash.py +39 -0
  239. union/_internal/controllers/remote/__init__.py +40 -0
  240. union/_internal/controllers/remote/_action.py +131 -0
  241. union/_internal/controllers/remote/_client.py +43 -0
  242. union/_internal/controllers/remote/_controller.py +169 -0
  243. union/_internal/controllers/remote/_core.py +341 -0
  244. union/_internal/controllers/remote/_informer.py +260 -0
  245. union/_internal/controllers/remote/_service_protocol.py +44 -0
  246. union/_internal/imagebuild/__init__.py +11 -0
  247. union/_internal/imagebuild/docker_builder.py +416 -0
  248. union/_internal/imagebuild/image_builder.py +243 -0
  249. union/_internal/imagebuild/remote_builder.py +0 -0
  250. union/_internal/resolvers/__init__.py +0 -0
  251. union/_internal/resolvers/_task_module.py +31 -0
  252. union/_internal/resolvers/common.py +24 -0
  253. union/_internal/resolvers/default.py +27 -0
  254. union/_internal/runtime/__init__.py +0 -0
  255. union/_internal/runtime/convert.py +163 -0
  256. union/_internal/runtime/entrypoints.py +121 -0
  257. union/_internal/runtime/io.py +136 -0
  258. union/_internal/runtime/resources_serde.py +134 -0
  259. union/_internal/runtime/task_serde.py +202 -0
  260. union/_internal/runtime/taskrunner.py +179 -0
  261. union/_internal/runtime/types_serde.py +53 -0
  262. union/_logging.py +124 -0
  263. union/_protos/__init__.py +0 -0
  264. union/_protos/common/authorization_pb2.py +66 -0
  265. union/_protos/common/authorization_pb2.pyi +106 -0
  266. union/_protos/common/authorization_pb2_grpc.py +4 -0
  267. union/_protos/common/identifier_pb2.py +71 -0
  268. union/_protos/common/identifier_pb2.pyi +82 -0
  269. union/_protos/common/identifier_pb2_grpc.py +4 -0
  270. union/_protos/common/identity_pb2.py +48 -0
  271. union/_protos/common/identity_pb2.pyi +72 -0
  272. union/_protos/common/identity_pb2_grpc.py +4 -0
  273. union/_protos/common/list_pb2.py +36 -0
  274. union/_protos/common/list_pb2.pyi +69 -0
  275. union/_protos/common/list_pb2_grpc.py +4 -0
  276. union/_protos/common/policy_pb2.py +37 -0
  277. union/_protos/common/policy_pb2.pyi +27 -0
  278. union/_protos/common/policy_pb2_grpc.py +4 -0
  279. union/_protos/common/role_pb2.py +37 -0
  280. union/_protos/common/role_pb2.pyi +51 -0
  281. union/_protos/common/role_pb2_grpc.py +4 -0
  282. union/_protos/common/runtime_version_pb2.py +28 -0
  283. union/_protos/common/runtime_version_pb2.pyi +24 -0
  284. union/_protos/common/runtime_version_pb2_grpc.py +4 -0
  285. union/_protos/logs/dataplane/payload_pb2.py +96 -0
  286. union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  287. union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  288. union/_protos/secret/definition_pb2.py +49 -0
  289. union/_protos/secret/definition_pb2.pyi +93 -0
  290. union/_protos/secret/definition_pb2_grpc.py +4 -0
  291. union/_protos/secret/payload_pb2.py +62 -0
  292. union/_protos/secret/payload_pb2.pyi +94 -0
  293. union/_protos/secret/payload_pb2_grpc.py +4 -0
  294. union/_protos/secret/secret_pb2.py +38 -0
  295. union/_protos/secret/secret_pb2.pyi +6 -0
  296. union/_protos/secret/secret_pb2_grpc.py +198 -0
  297. union/_protos/validate/validate/validate_pb2.py +76 -0
  298. union/_protos/workflow/node_execution_service_pb2.py +26 -0
  299. union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  300. union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  301. union/_protos/workflow/queue_service_pb2.py +75 -0
  302. union/_protos/workflow/queue_service_pb2.pyi +103 -0
  303. union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  304. union/_protos/workflow/run_definition_pb2.py +100 -0
  305. union/_protos/workflow/run_definition_pb2.pyi +256 -0
  306. union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  307. union/_protos/workflow/run_logs_service_pb2.py +41 -0
  308. union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  309. union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  310. union/_protos/workflow/run_service_pb2.py +133 -0
  311. union/_protos/workflow/run_service_pb2.pyi +173 -0
  312. union/_protos/workflow/run_service_pb2_grpc.py +412 -0
  313. union/_protos/workflow/state_service_pb2.py +58 -0
  314. union/_protos/workflow/state_service_pb2.pyi +69 -0
  315. union/_protos/workflow/state_service_pb2_grpc.py +138 -0
  316. union/_protos/workflow/task_definition_pb2.py +72 -0
  317. union/_protos/workflow/task_definition_pb2.pyi +65 -0
  318. union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  319. union/_protos/workflow/task_service_pb2.py +44 -0
  320. union/_protos/workflow/task_service_pb2.pyi +31 -0
  321. union/_protos/workflow/task_service_pb2_grpc.py +104 -0
  322. union/_resources.py +226 -0
  323. union/_retry.py +32 -0
  324. union/_reusable_environment.py +25 -0
  325. union/_run.py +374 -0
  326. union/_secret.py +61 -0
  327. union/_task.py +354 -0
  328. union/_task_environment.py +186 -0
  329. union/_timeout.py +47 -0
  330. union/_tools.py +27 -0
  331. union/_utils/__init__.py +11 -0
  332. union/_utils/asyn.py +119 -0
  333. union/_utils/file_handling.py +71 -0
  334. union/_utils/helpers.py +46 -0
  335. union/_utils/lazy_module.py +54 -0
  336. union/_utils/uv_script_parser.py +49 -0
  337. union/_version.py +21 -0
  338. union/connectors/__init__.py +0 -0
  339. union/errors.py +128 -0
  340. union/extras/__init__.py +5 -0
  341. union/extras/_container.py +263 -0
  342. union/io/__init__.py +11 -0
  343. union/io/_dataframe.py +0 -0
  344. union/io/_dir.py +425 -0
  345. union/io/_file.py +418 -0
  346. union/io/pickle/__init__.py +0 -0
  347. union/io/pickle/transformer.py +117 -0
  348. union/io/structured_dataset/__init__.py +122 -0
  349. union/io/structured_dataset/basic_dfs.py +219 -0
  350. union/io/structured_dataset/structured_dataset.py +1057 -0
  351. union/py.typed +0 -0
  352. union/remote/__init__.py +23 -0
  353. union/remote/_client/__init__.py +0 -0
  354. union/remote/_client/_protocols.py +129 -0
  355. union/remote/_client/auth/__init__.py +12 -0
  356. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  357. union/remote/_client/auth/_authenticators/base.py +391 -0
  358. union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  359. union/remote/_client/auth/_authenticators/device_code.py +120 -0
  360. union/remote/_client/auth/_authenticators/external_command.py +77 -0
  361. union/remote/_client/auth/_authenticators/factory.py +200 -0
  362. union/remote/_client/auth/_authenticators/pkce.py +515 -0
  363. union/remote/_client/auth/_channel.py +184 -0
  364. union/remote/_client/auth/_client_config.py +83 -0
  365. union/remote/_client/auth/_default_html.py +32 -0
  366. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  367. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
  368. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
  369. union/remote/_client/auth/_keyring.py +154 -0
  370. union/remote/_client/auth/_token_client.py +258 -0
  371. union/remote/_client/auth/errors.py +16 -0
  372. union/remote/_client/controlplane.py +86 -0
  373. union/remote/_data.py +149 -0
  374. union/remote/_logs.py +74 -0
  375. union/remote/_project.py +86 -0
  376. union/remote/_run.py +820 -0
  377. union/remote/_secret.py +132 -0
  378. union/remote/_task.py +193 -0
  379. union/report/__init__.py +3 -0
  380. union/report/_report.py +178 -0
  381. union/report/_template.html +124 -0
  382. union/storage/__init__.py +24 -0
  383. union/storage/_remote_fs.py +34 -0
  384. union/storage/_storage.py +247 -0
  385. union/storage/_utils.py +5 -0
  386. union/types/__init__.py +11 -0
  387. union/types/_renderer.py +162 -0
  388. union/types/_string_literals.py +120 -0
  389. union/types/_type_engine.py +2131 -0
  390. union/types/_utils.py +80 -0
flyte/io/_file.py ADDED
@@ -0,0 +1,468 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from contextlib import asynccontextmanager, contextmanager
5
+ from pathlib import Path
6
+ from typing import (
7
+ IO,
8
+ Any,
9
+ AsyncGenerator,
10
+ Dict,
11
+ Generator,
12
+ Generic,
13
+ Optional,
14
+ Type,
15
+ TypeVar,
16
+ Union,
17
+ )
18
+
19
+ import aiofiles
20
+ from flyteidl.core import literals_pb2, types_pb2
21
+ from fsspec.asyn import AsyncFileSystem
22
+ from fsspec.utils import get_protocol
23
+ from mashumaro.types import SerializableType
24
+ from pydantic import BaseModel, model_validator
25
+ from synchronicity import Synchronizer
26
+
27
+ import flyte.storage as storage
28
+ from flyte._context import internal_ctx
29
+ from flyte._initialize import requires_initialization
30
+ from flyte._logging import logger
31
+ from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
32
+
33
+ # Type variable for the file format
34
+ T = TypeVar("T")
35
+
36
+ synced = Synchronizer()
37
+
38
+
39
+ class File(BaseModel, Generic[T], SerializableType):
40
+ """
41
+ A generic file class representing a file with a specified format.
42
+ Provides both async and sync interfaces for file operations.
43
+ Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
44
+
45
+ The generic type T represents the format of the file.
46
+
47
+ Example:
48
+ ```python
49
+ # Async usage
50
+ from pandas import DataFrame
51
+ csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
52
+
53
+ async with csv_file.open() as f:
54
+ content = await f.read()
55
+
56
+ # Sync alternative
57
+ with csv_file.open_sync() as f:
58
+ content = f.read()
59
+ ```
60
+
61
+ Example: Read a file input in a Task.
62
+ ```
63
+ @env.task
64
+ async def my_task(file: File[DataFrame]):
65
+ async with file.open() as f:
66
+ df = pd.read_csv(f)
67
+ ```
68
+
69
+ Example: Write a file by streaming it directly to blob storage
70
+ ```
71
+ @env.task
72
+ async def my_task() -> File[DataFrame]:
73
+ df = pd.DataFrame(...)
74
+ file = File.new_remote()
75
+ async with file.open("wb") as f:
76
+ df.to_csv(f)
77
+ # No additional uploading will be done here.
78
+ return file
79
+ ```
80
+ Example: Write a file by writing it locally first, and then uploading it.
81
+ ```
82
+ @env.task
83
+ async def my_task() -> File[DataFrame]:
84
+ # write to /tmp/data.csv
85
+ return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
86
+ ```
87
+
88
+ Example: From an existing remote file
89
+ ```
90
+ @env.task
91
+ async def my_task() -> File[DataFrame]:
92
+ return File.from_existing_remote("s3://my-bucket/data.csv")
93
+ ```
94
+
95
+ Example: Take a remote file as input and return the same one, should not do any copy
96
+ ```
97
+ @env.task
98
+ async def my_task(file: File[DataFrame]) -> File[DataFrame]:
99
+ return file
100
+ ```
101
+
102
+ Args:
103
+ path: The path to the file (can be local or remote)
104
+ name: Optional name for the file (defaults to basename of path)
105
+ """
106
+
107
+ path: str
108
+ name: Optional[str] = None
109
+ format: str = ""
110
+
111
+ class Config:
112
+ arbitrary_types_allowed = True
113
+
114
+ @model_validator(mode="before")
115
+ @classmethod
116
+ def pre_init(cls, data):
117
+ if data.get("name") is None:
118
+ data["name"] = Path(data["path"]).name
119
+ return data
120
+
121
+ def _serialize(self) -> Dict[str, Optional[str]]:
122
+ pyd_dump = self.model_dump()
123
+ return pyd_dump
124
+
125
+ @classmethod
126
+ def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
127
+ return File.model_validate(file_dump)
128
+
129
+ @classmethod
130
+ def schema_match(cls, incoming: dict):
131
+ this_schema = cls.model_json_schema()
132
+ current_required = this_schema.get("required")
133
+ incoming_required = incoming.get("required")
134
+ if (
135
+ current_required
136
+ and incoming_required
137
+ and incoming.get("type") == this_schema.get("type")
138
+ and incoming.get("title") == this_schema.get("title")
139
+ and set(current_required) == set(incoming_required)
140
+ ):
141
+ return True
142
+
143
+ @classmethod
144
+ @requires_initialization
145
+ def new_remote(cls) -> File[T]:
146
+ """
147
+ Create a new File reference for a remote file that will be written to.
148
+
149
+ Example:
150
+ ```
151
+ @env.task
152
+ async def my_task() -> File[DataFrame]:
153
+ df = pd.DataFrame(...)
154
+ file = File.new_remote()
155
+ async with file.open("wb") as f:
156
+ df.to_csv(f)
157
+ return file
158
+ ```
159
+ """
160
+ ctx = internal_ctx()
161
+
162
+ return cls(path=ctx.raw_data.get_random_remote_path())
163
+
164
+ @classmethod
165
+ def from_existing_remote(cls, remote_path: str) -> File[T]:
166
+ """
167
+ Create a File reference from an existing remote file.
168
+
169
+ Example:
170
+ ```python
171
+ @env.task
172
+ async def my_task() -> File[DataFrame]:
173
+ return File.from_existing_remote("s3://my-bucket/data.csv")
174
+ ```
175
+
176
+ Args:
177
+ remote_path: The remote path to the existing file
178
+ """
179
+ return cls(path=remote_path)
180
+
181
+ @asynccontextmanager
182
+ async def open(
183
+ self,
184
+ mode: str = "rb",
185
+ block_size: Optional[int] = None,
186
+ cache_type: str = "readahead",
187
+ cache_options: Optional[dict] = None,
188
+ compression: Optional[str] = None,
189
+ **kwargs,
190
+ ) -> AsyncGenerator[IO[Any]]:
191
+ """
192
+ Asynchronously open the file and return a file-like object.
193
+
194
+ Args:
195
+ mode: The mode to open the file in (default: 'rb')
196
+ block_size: Size of blocks for reading (bytes)
197
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
198
+ cache_options: Dictionary of options for the cache
199
+ compression: Compression format or None for auto-detection
200
+ **kwargs: Additional arguments passed to fsspec's open method
201
+
202
+ Returns:
203
+ An async file-like object
204
+
205
+ Example:
206
+ ```python
207
+ async with file.open('rb') as f:
208
+ data = await f.read()
209
+ ```
210
+ """
211
+ fs = storage.get_underlying_filesystem(path=self.path)
212
+
213
+ # Set up cache options if provided
214
+ if cache_options is None:
215
+ cache_options = {}
216
+
217
+ # Configure the open parameters
218
+ open_kwargs = {"mode": mode, **kwargs}
219
+ if compression:
220
+ open_kwargs["compression"] = compression
221
+
222
+ if block_size:
223
+ open_kwargs["block_size"] = block_size
224
+
225
+ # Apply caching strategy
226
+ if cache_type != "none":
227
+ open_kwargs["cache_type"] = cache_type
228
+ open_kwargs["cache_options"] = cache_options
229
+
230
+ # Use aiofiles for local files
231
+ if fs.protocol == "file":
232
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
233
+ yield f
234
+ else:
235
+ # This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
236
+ file_handle = None
237
+ try:
238
+ if isinstance(fs, AsyncFileSystem):
239
+ file_handle = await fs.open_async(self.path, mode)
240
+ yield file_handle
241
+ return
242
+ except NotImplementedError:
243
+ logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
244
+ finally:
245
+ if file_handle is not None:
246
+ file_handle.close()
247
+
248
+ with fs.open(self.path, mode) as file_handle:
249
+ yield file_handle
250
+
251
+ def exists_sync(self) -> bool:
252
+ """
253
+ Synchronously check if the file exists.
254
+
255
+ Returns:
256
+ True if the file exists, False otherwise
257
+
258
+ Example:
259
+ ```python
260
+ if file.exists_sync():
261
+ # Process the file
262
+ ```
263
+ """
264
+ fs = storage.get_underlying_filesystem(path=self.path)
265
+ return fs.exists(self.path)
266
+
267
+ @contextmanager
268
+ def open_sync(
269
+ self,
270
+ mode: str = "rb",
271
+ block_size: Optional[int] = None,
272
+ cache_type: str = "readahead",
273
+ cache_options: Optional[dict] = None,
274
+ compression: Optional[str] = None,
275
+ **kwargs,
276
+ ) -> Generator[IO[Any]]:
277
+ """
278
+ Synchronously open the file and return a file-like object.
279
+
280
+ Args:
281
+ mode: The mode to open the file in (default: 'rb')
282
+ block_size: Size of blocks for reading (bytes)
283
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
284
+ cache_options: Dictionary of options for the cache
285
+ compression: Compression format or None for auto-detection
286
+ **kwargs: Additional arguments passed to fsspec's open method
287
+
288
+ Returns:
289
+ A file-like object
290
+
291
+ Example:
292
+ ```python
293
+ with file.open_sync('rb') as f:
294
+ data = f.read()
295
+ ```
296
+ """
297
+ fs = storage.get_underlying_filesystem(path=self.path)
298
+
299
+ # Set up cache options if provided
300
+ if cache_options is None:
301
+ cache_options = {}
302
+
303
+ # Configure the open parameters
304
+ open_kwargs = {"mode": mode, "compression": compression, **kwargs}
305
+
306
+ if block_size:
307
+ open_kwargs["block_size"] = block_size
308
+
309
+ # Apply caching strategy
310
+ if cache_type != "none":
311
+ open_kwargs["cache_type"] = cache_type
312
+ open_kwargs["cache_options"] = cache_options
313
+
314
+ with fs.open(self.path, **open_kwargs) as f:
315
+ yield f
316
+
317
+ # @synced.wrap - enabling this did not work - synchronicity/pydantic issue
318
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
319
+ """
320
+ Asynchronously download the file to a local path.
321
+
322
+ Args:
323
+ local_path: The local path to download the file to. If None, a temporary
324
+ directory will be used.
325
+
326
+ Returns:
327
+ The path to the downloaded file
328
+
329
+ Example:
330
+ ```python
331
+ local_file = await file.download('/tmp/myfile.csv')
332
+ ```
333
+ """
334
+ if local_path is None:
335
+ local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
336
+ else:
337
+ local_path = str(Path(local_path).absolute())
338
+
339
+ fs = storage.get_underlying_filesystem(path=self.path)
340
+
341
+ # If it's already a local file, just copy it
342
+ if "file" in fs.protocol:
343
+ # Use aiofiles for async copy
344
+ async with aiofiles.open(self.path, "rb") as src:
345
+ async with aiofiles.open(local_path, "wb") as dst:
346
+ await dst.write(await src.read())
347
+ return str(local_path)
348
+
349
+ # Otherwise download from remote using async functionality
350
+ await storage.get(self.path, str(local_path))
351
+ return str(local_path)
352
+
353
+ @classmethod
354
+ @requires_initialization
355
+ async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
356
+ """
357
+ Create a new File object from a local file that will be uploaded to the configured remote store.
358
+
359
+ Args:
360
+ local_path: Path to the local file
361
+ remote_destination: Optional path to store the file remotely. If None, a path will be generated.
362
+
363
+ Returns:
364
+ A new File instance pointing to the uploaded file
365
+
366
+ Example:
367
+ ```python
368
+ remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
369
+ ```
370
+ """
371
+ if not os.path.exists(local_path):
372
+ raise ValueError(f"File not found: {local_path}")
373
+
374
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path()
375
+ protocol = get_protocol(remote_path)
376
+ filename = Path(local_path).name
377
+
378
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
379
+ # then let's optimize by not uploading.
380
+ if "file" in protocol:
381
+ if remote_destination is None:
382
+ path = str(Path(local_path).absolute())
383
+ else:
384
+ # Otherwise, actually make a copy of the file
385
+ async with aiofiles.open(remote_path, "rb") as src:
386
+ async with aiofiles.open(local_path, "wb") as dst:
387
+ await dst.write(await src.read())
388
+ path = str(Path(remote_path).absolute())
389
+ else:
390
+ # Otherwise upload to remote using async storage layer
391
+ path = await storage.put(str(local_path), remote_path)
392
+
393
+ f = cls(path=path, name=filename)
394
+ return f
395
+
396
+
397
+ class FileTransformer(TypeTransformer[File]):
398
+ """
399
+ Transformer for File objects. This type transformer does not handle any i/o. That is now the responsibility of the
400
+ user.
401
+ """
402
+
403
+ def __init__(self):
404
+ super().__init__(name="File", t=File)
405
+
406
+ def get_literal_type(self, t: Type[File]) -> types_pb2.LiteralType:
407
+ """Get the Flyte literal type for a File type."""
408
+ return types_pb2.LiteralType(
409
+ blob=types_pb2.BlobType(
410
+ # todo: set format from generic
411
+ format="", # Format is determined by the generic type T
412
+ dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE,
413
+ )
414
+ )
415
+
416
+ async def to_literal(
417
+ self,
418
+ python_val: File,
419
+ python_type: Type[File],
420
+ expected: types_pb2.LiteralType,
421
+ ) -> literals_pb2.Literal:
422
+ """Convert a File object to a Flyte literal."""
423
+ if not isinstance(python_val, File):
424
+ raise TypeTransformerFailedError(f"Expected File object, received {type(python_val)}")
425
+
426
+ return literals_pb2.Literal(
427
+ scalar=literals_pb2.Scalar(
428
+ blob=literals_pb2.Blob(
429
+ metadata=literals_pb2.BlobMetadata(
430
+ type=types_pb2.BlobType(
431
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
432
+ )
433
+ ),
434
+ uri=python_val.path,
435
+ )
436
+ )
437
+ )
438
+
439
+ async def to_python_value(
440
+ self,
441
+ lv: literals_pb2.Literal,
442
+ expected_python_type: Type[File],
443
+ ) -> File:
444
+ """Convert a Flyte literal to a File object."""
445
+ if not lv.scalar.HasField("blob"):
446
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
447
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE:
448
+ raise TypeTransformerFailedError(
449
+ f"Expected single part blob, received {lv.scalar.blob.metadata.type.dimensionality}"
450
+ )
451
+
452
+ uri = lv.scalar.blob.uri
453
+ filename = Path(uri).name
454
+ f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
455
+ return f
456
+
457
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
458
+ """Guess the Python type from a Flyte literal type."""
459
+ if (
460
+ literal_type.HasField("blob")
461
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
462
+ and literal_type.blob.format != "PythonPickle" # see pickle transformer
463
+ ):
464
+ return File
465
+ raise ValueError(f"Cannot guess python type from {literal_type}")
466
+
467
+
468
+ TypeEngine.register(FileTransformer())
File without changes
@@ -0,0 +1,117 @@
1
+ import hashlib
2
+ import os
3
+ import typing
4
+ from typing import Type
5
+
6
+ import aiofiles
7
+ import cloudpickle
8
+ from flyteidl.core import literals_pb2, types_pb2
9
+
10
+ import flyte.storage as storage
11
+ from flyte.types import TypeEngine, TypeTransformer
12
+
13
+ T = typing.TypeVar("T")
14
+
15
+
16
+ class FlytePickle(typing.Generic[T]):
17
+ """
18
+ This type is only used by flytekit internally. User should not use this type.
19
+ Any type that flyte can't recognize will become FlytePickle
20
+ """
21
+
22
+ @classmethod
23
+ def python_type(cls) -> typing.Type:
24
+ return type(None)
25
+
26
+ @classmethod
27
+ def __class_getitem__(cls, python_type: typing.Type) -> typing.Type:
28
+ if python_type is None:
29
+ return cls
30
+
31
+ class _SpecificFormatClass(FlytePickle):
32
+ # Get the type engine to see this as kind of a generic
33
+ __origin__ = FlytePickle
34
+
35
+ @classmethod
36
+ def python_type(cls) -> typing.Type:
37
+ return python_type
38
+
39
+ return _SpecificFormatClass
40
+
41
+ @classmethod
42
+ async def to_pickle(cls, python_val: typing.Any) -> str:
43
+ h = hashlib.md5()
44
+ str_bytes = cloudpickle.dumps(python_val)
45
+ h.update(str_bytes)
46
+
47
+ uri = storage.get_random_local_path(file_path_or_file_name=h.hexdigest())
48
+ os.makedirs(os.path.dirname(uri), exist_ok=True)
49
+ async with aiofiles.open(uri, "w+b") as outfile:
50
+ await outfile.write(str_bytes)
51
+
52
+ return await storage.put(str(uri))
53
+
54
+ @classmethod
55
+ async def from_pickle(cls, uri: str) -> typing.Any:
56
+ # Deserialize the pickle, and return data in the pickle,
57
+ # and download pickle file to local first if file is not in the local file systems.
58
+ if storage.is_remote(uri):
59
+ local_path = storage.get_random_local_path()
60
+ await storage.get(uri, str(local_path), False)
61
+ uri = str(local_path)
62
+ async with aiofiles.open(uri, "rb") as infile:
63
+ data = cloudpickle.loads(await infile.read())
64
+ return data
65
+
66
+
67
+ class FlytePickleTransformer(TypeTransformer[FlytePickle]):
68
+ PYTHON_PICKLE_FORMAT = "PythonPickle"
69
+
70
+ def __init__(self):
71
+ super().__init__(name="FlytePickle", t=FlytePickle)
72
+
73
+ def assert_type(self, t: Type[T], v: T):
74
+ # Every type can serialize to pickle, so we don't need to check the type here.
75
+ ...
76
+
77
+ async def to_python_value(self, lv: literals_pb2.Literal, expected_python_type: Type[T]) -> T:
78
+ uri = lv.scalar.blob.uri
79
+ return await FlytePickle.from_pickle(uri)
80
+
81
+ async def to_literal(
82
+ self,
83
+ python_val: T,
84
+ python_type: Type[T],
85
+ expected: types_pb2.LiteralType,
86
+ ) -> literals_pb2.Literal:
87
+ if python_val is None:
88
+ raise AssertionError("Cannot pickle None Value.")
89
+ meta = literals_pb2.BlobMetadata(
90
+ type=types_pb2.BlobType(
91
+ format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
92
+ )
93
+ )
94
+ remote_path = await FlytePickle.to_pickle(python_val)
95
+ return literals_pb2.Literal(scalar=literals_pb2.Scalar(blob=literals_pb2.Blob(metadata=meta, uri=remote_path)))
96
+
97
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> typing.Type[FlytePickle[typing.Any]]:
98
+ if (
99
+ literal_type.blob is not None
100
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
101
+ and literal_type.blob.format == FlytePickleTransformer.PYTHON_PICKLE_FORMAT
102
+ ):
103
+ return FlytePickle
104
+
105
+ raise ValueError(f"Transformer {self} cannot reverse {literal_type}")
106
+
107
+ def get_literal_type(self, t: Type[T]) -> types_pb2.LiteralType:
108
+ lt = types_pb2.LiteralType(
109
+ blob=types_pb2.BlobType(
110
+ format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
111
+ )
112
+ )
113
+ lt.metadata = {"python_class_name": str(t)}
114
+ return lt
115
+
116
+
117
+ TypeEngine.register(FlytePickleTransformer())