flyte 0.0.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (390) hide show
  1. flyte/__init__.py +62 -0
  2. flyte/_api_commons.py +3 -0
  3. flyte/_bin/__init__.py +0 -0
  4. flyte/_bin/runtime.py +126 -0
  5. flyte/_build.py +25 -0
  6. flyte/_cache/__init__.py +12 -0
  7. flyte/_cache/cache.py +146 -0
  8. flyte/_cache/defaults.py +9 -0
  9. flyte/_cache/policy_function_body.py +42 -0
  10. flyte/_cli/__init__.py +0 -0
  11. flyte/_cli/_common.py +287 -0
  12. flyte/_cli/_create.py +42 -0
  13. flyte/_cli/_delete.py +23 -0
  14. flyte/_cli/_deploy.py +140 -0
  15. flyte/_cli/_get.py +235 -0
  16. flyte/_cli/_run.py +152 -0
  17. flyte/_cli/main.py +72 -0
  18. flyte/_code_bundle/__init__.py +8 -0
  19. flyte/_code_bundle/_ignore.py +113 -0
  20. flyte/_code_bundle/_packaging.py +187 -0
  21. flyte/_code_bundle/_utils.py +339 -0
  22. flyte/_code_bundle/bundle.py +178 -0
  23. flyte/_context.py +146 -0
  24. flyte/_datastructures.py +342 -0
  25. flyte/_deploy.py +202 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +43 -0
  29. flyte/_group.py +31 -0
  30. flyte/_hash.py +23 -0
  31. flyte/_image.py +760 -0
  32. flyte/_initialize.py +634 -0
  33. flyte/_interface.py +84 -0
  34. flyte/_internal/__init__.py +3 -0
  35. flyte/_internal/controllers/__init__.py +115 -0
  36. flyte/_internal/controllers/_local_controller.py +118 -0
  37. flyte/_internal/controllers/_trace.py +40 -0
  38. flyte/_internal/controllers/pbhash.py +39 -0
  39. flyte/_internal/controllers/remote/__init__.py +40 -0
  40. flyte/_internal/controllers/remote/_action.py +141 -0
  41. flyte/_internal/controllers/remote/_client.py +43 -0
  42. flyte/_internal/controllers/remote/_controller.py +361 -0
  43. flyte/_internal/controllers/remote/_core.py +402 -0
  44. flyte/_internal/controllers/remote/_informer.py +361 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +11 -0
  47. flyte/_internal/imagebuild/docker_builder.py +416 -0
  48. flyte/_internal/imagebuild/image_builder.py +241 -0
  49. flyte/_internal/imagebuild/remote_builder.py +0 -0
  50. flyte/_internal/resolvers/__init__.py +0 -0
  51. flyte/_internal/resolvers/_task_module.py +54 -0
  52. flyte/_internal/resolvers/common.py +31 -0
  53. flyte/_internal/resolvers/default.py +28 -0
  54. flyte/_internal/runtime/__init__.py +0 -0
  55. flyte/_internal/runtime/convert.py +199 -0
  56. flyte/_internal/runtime/entrypoints.py +135 -0
  57. flyte/_internal/runtime/io.py +136 -0
  58. flyte/_internal/runtime/resources_serde.py +138 -0
  59. flyte/_internal/runtime/task_serde.py +210 -0
  60. flyte/_internal/runtime/taskrunner.py +190 -0
  61. flyte/_internal/runtime/types_serde.py +54 -0
  62. flyte/_logging.py +124 -0
  63. flyte/_protos/__init__.py +0 -0
  64. flyte/_protos/common/authorization_pb2.py +66 -0
  65. flyte/_protos/common/authorization_pb2.pyi +108 -0
  66. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  67. flyte/_protos/common/identifier_pb2.py +71 -0
  68. flyte/_protos/common/identifier_pb2.pyi +82 -0
  69. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  70. flyte/_protos/common/identity_pb2.py +48 -0
  71. flyte/_protos/common/identity_pb2.pyi +72 -0
  72. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  73. flyte/_protos/common/list_pb2.py +36 -0
  74. flyte/_protos/common/list_pb2.pyi +69 -0
  75. flyte/_protos/common/list_pb2_grpc.py +4 -0
  76. flyte/_protos/common/policy_pb2.py +37 -0
  77. flyte/_protos/common/policy_pb2.pyi +27 -0
  78. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  79. flyte/_protos/common/role_pb2.py +37 -0
  80. flyte/_protos/common/role_pb2.pyi +53 -0
  81. flyte/_protos/common/role_pb2_grpc.py +4 -0
  82. flyte/_protos/common/runtime_version_pb2.py +28 -0
  83. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  84. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  85. flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
  86. flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  87. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  88. flyte/_protos/secret/definition_pb2.py +49 -0
  89. flyte/_protos/secret/definition_pb2.pyi +93 -0
  90. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  91. flyte/_protos/secret/payload_pb2.py +62 -0
  92. flyte/_protos/secret/payload_pb2.pyi +94 -0
  93. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  94. flyte/_protos/secret/secret_pb2.py +38 -0
  95. flyte/_protos/secret/secret_pb2.pyi +6 -0
  96. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  97. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  98. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  99. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  100. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  101. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  102. flyte/_protos/workflow/queue_service_pb2.py +106 -0
  103. flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
  104. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  105. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  106. flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
  107. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  108. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  109. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  110. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  111. flyte/_protos/workflow/run_service_pb2.py +133 -0
  112. flyte/_protos/workflow/run_service_pb2.pyi +175 -0
  113. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  114. flyte/_protos/workflow/state_service_pb2.py +58 -0
  115. flyte/_protos/workflow/state_service_pb2.pyi +71 -0
  116. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  117. flyte/_protos/workflow/task_definition_pb2.py +72 -0
  118. flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
  119. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  120. flyte/_protos/workflow/task_service_pb2.py +44 -0
  121. flyte/_protos/workflow/task_service_pb2.pyi +31 -0
  122. flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
  123. flyte/_resources.py +226 -0
  124. flyte/_retry.py +32 -0
  125. flyte/_reusable_environment.py +25 -0
  126. flyte/_run.py +411 -0
  127. flyte/_secret.py +61 -0
  128. flyte/_task.py +367 -0
  129. flyte/_task_environment.py +200 -0
  130. flyte/_timeout.py +47 -0
  131. flyte/_tools.py +27 -0
  132. flyte/_trace.py +128 -0
  133. flyte/_utils/__init__.py +20 -0
  134. flyte/_utils/asyn.py +119 -0
  135. flyte/_utils/coro_management.py +25 -0
  136. flyte/_utils/file_handling.py +72 -0
  137. flyte/_utils/helpers.py +108 -0
  138. flyte/_utils/lazy_module.py +54 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/connectors/__init__.py +0 -0
  142. flyte/errors.py +143 -0
  143. flyte/extras/__init__.py +5 -0
  144. flyte/extras/_container.py +273 -0
  145. flyte/io/__init__.py +11 -0
  146. flyte/io/_dataframe.py +0 -0
  147. flyte/io/_dir.py +448 -0
  148. flyte/io/_file.py +468 -0
  149. flyte/io/pickle/__init__.py +0 -0
  150. flyte/io/pickle/transformer.py +117 -0
  151. flyte/io/structured_dataset/__init__.py +129 -0
  152. flyte/io/structured_dataset/basic_dfs.py +219 -0
  153. flyte/io/structured_dataset/structured_dataset.py +1061 -0
  154. flyte/py.typed +0 -0
  155. flyte/remote/__init__.py +25 -0
  156. flyte/remote/_client/__init__.py +0 -0
  157. flyte/remote/_client/_protocols.py +131 -0
  158. flyte/remote/_client/auth/__init__.py +12 -0
  159. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  160. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  161. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  162. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  163. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  164. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  165. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  166. flyte/remote/_client/auth/_channel.py +184 -0
  167. flyte/remote/_client/auth/_client_config.py +83 -0
  168. flyte/remote/_client/auth/_default_html.py +32 -0
  169. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  170. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  171. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  172. flyte/remote/_client/auth/_keyring.py +143 -0
  173. flyte/remote/_client/auth/_token_client.py +260 -0
  174. flyte/remote/_client/auth/errors.py +16 -0
  175. flyte/remote/_client/controlplane.py +95 -0
  176. flyte/remote/_console.py +18 -0
  177. flyte/remote/_data.py +155 -0
  178. flyte/remote/_logs.py +116 -0
  179. flyte/remote/_project.py +86 -0
  180. flyte/remote/_run.py +873 -0
  181. flyte/remote/_secret.py +132 -0
  182. flyte/remote/_task.py +227 -0
  183. flyte/report/__init__.py +3 -0
  184. flyte/report/_report.py +178 -0
  185. flyte/report/_template.html +124 -0
  186. flyte/storage/__init__.py +24 -0
  187. flyte/storage/_remote_fs.py +34 -0
  188. flyte/storage/_storage.py +251 -0
  189. flyte/storage/_utils.py +5 -0
  190. flyte/types/__init__.py +13 -0
  191. flyte/types/_interface.py +25 -0
  192. flyte/types/_renderer.py +162 -0
  193. flyte/types/_string_literals.py +120 -0
  194. flyte/types/_type_engine.py +2210 -0
  195. flyte/types/_utils.py +80 -0
  196. flyte-0.0.1b0.dist-info/METADATA +179 -0
  197. flyte-0.0.1b0.dist-info/RECORD +390 -0
  198. flyte-0.0.1b0.dist-info/WHEEL +5 -0
  199. flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
  200. flyte-0.0.1b0.dist-info/top_level.txt +1 -0
  201. union/__init__.py +54 -0
  202. union/_api_commons.py +3 -0
  203. union/_bin/__init__.py +0 -0
  204. union/_bin/runtime.py +113 -0
  205. union/_build.py +25 -0
  206. union/_cache/__init__.py +12 -0
  207. union/_cache/cache.py +141 -0
  208. union/_cache/defaults.py +9 -0
  209. union/_cache/policy_function_body.py +42 -0
  210. union/_cli/__init__.py +0 -0
  211. union/_cli/_common.py +263 -0
  212. union/_cli/_create.py +40 -0
  213. union/_cli/_delete.py +23 -0
  214. union/_cli/_deploy.py +120 -0
  215. union/_cli/_get.py +162 -0
  216. union/_cli/_params.py +579 -0
  217. union/_cli/_run.py +150 -0
  218. union/_cli/main.py +72 -0
  219. union/_code_bundle/__init__.py +8 -0
  220. union/_code_bundle/_ignore.py +113 -0
  221. union/_code_bundle/_packaging.py +187 -0
  222. union/_code_bundle/_utils.py +342 -0
  223. union/_code_bundle/bundle.py +176 -0
  224. union/_context.py +146 -0
  225. union/_datastructures.py +295 -0
  226. union/_deploy.py +185 -0
  227. union/_doc.py +29 -0
  228. union/_docstring.py +26 -0
  229. union/_environment.py +43 -0
  230. union/_group.py +31 -0
  231. union/_hash.py +23 -0
  232. union/_image.py +760 -0
  233. union/_initialize.py +585 -0
  234. union/_interface.py +84 -0
  235. union/_internal/__init__.py +3 -0
  236. union/_internal/controllers/__init__.py +77 -0
  237. union/_internal/controllers/_local_controller.py +77 -0
  238. union/_internal/controllers/pbhash.py +39 -0
  239. union/_internal/controllers/remote/__init__.py +40 -0
  240. union/_internal/controllers/remote/_action.py +131 -0
  241. union/_internal/controllers/remote/_client.py +43 -0
  242. union/_internal/controllers/remote/_controller.py +169 -0
  243. union/_internal/controllers/remote/_core.py +341 -0
  244. union/_internal/controllers/remote/_informer.py +260 -0
  245. union/_internal/controllers/remote/_service_protocol.py +44 -0
  246. union/_internal/imagebuild/__init__.py +11 -0
  247. union/_internal/imagebuild/docker_builder.py +416 -0
  248. union/_internal/imagebuild/image_builder.py +243 -0
  249. union/_internal/imagebuild/remote_builder.py +0 -0
  250. union/_internal/resolvers/__init__.py +0 -0
  251. union/_internal/resolvers/_task_module.py +31 -0
  252. union/_internal/resolvers/common.py +24 -0
  253. union/_internal/resolvers/default.py +27 -0
  254. union/_internal/runtime/__init__.py +0 -0
  255. union/_internal/runtime/convert.py +163 -0
  256. union/_internal/runtime/entrypoints.py +121 -0
  257. union/_internal/runtime/io.py +136 -0
  258. union/_internal/runtime/resources_serde.py +134 -0
  259. union/_internal/runtime/task_serde.py +202 -0
  260. union/_internal/runtime/taskrunner.py +179 -0
  261. union/_internal/runtime/types_serde.py +53 -0
  262. union/_logging.py +124 -0
  263. union/_protos/__init__.py +0 -0
  264. union/_protos/common/authorization_pb2.py +66 -0
  265. union/_protos/common/authorization_pb2.pyi +106 -0
  266. union/_protos/common/authorization_pb2_grpc.py +4 -0
  267. union/_protos/common/identifier_pb2.py +71 -0
  268. union/_protos/common/identifier_pb2.pyi +82 -0
  269. union/_protos/common/identifier_pb2_grpc.py +4 -0
  270. union/_protos/common/identity_pb2.py +48 -0
  271. union/_protos/common/identity_pb2.pyi +72 -0
  272. union/_protos/common/identity_pb2_grpc.py +4 -0
  273. union/_protos/common/list_pb2.py +36 -0
  274. union/_protos/common/list_pb2.pyi +69 -0
  275. union/_protos/common/list_pb2_grpc.py +4 -0
  276. union/_protos/common/policy_pb2.py +37 -0
  277. union/_protos/common/policy_pb2.pyi +27 -0
  278. union/_protos/common/policy_pb2_grpc.py +4 -0
  279. union/_protos/common/role_pb2.py +37 -0
  280. union/_protos/common/role_pb2.pyi +51 -0
  281. union/_protos/common/role_pb2_grpc.py +4 -0
  282. union/_protos/common/runtime_version_pb2.py +28 -0
  283. union/_protos/common/runtime_version_pb2.pyi +24 -0
  284. union/_protos/common/runtime_version_pb2_grpc.py +4 -0
  285. union/_protos/logs/dataplane/payload_pb2.py +96 -0
  286. union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  287. union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  288. union/_protos/secret/definition_pb2.py +49 -0
  289. union/_protos/secret/definition_pb2.pyi +93 -0
  290. union/_protos/secret/definition_pb2_grpc.py +4 -0
  291. union/_protos/secret/payload_pb2.py +62 -0
  292. union/_protos/secret/payload_pb2.pyi +94 -0
  293. union/_protos/secret/payload_pb2_grpc.py +4 -0
  294. union/_protos/secret/secret_pb2.py +38 -0
  295. union/_protos/secret/secret_pb2.pyi +6 -0
  296. union/_protos/secret/secret_pb2_grpc.py +198 -0
  297. union/_protos/validate/validate/validate_pb2.py +76 -0
  298. union/_protos/workflow/node_execution_service_pb2.py +26 -0
  299. union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  300. union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  301. union/_protos/workflow/queue_service_pb2.py +75 -0
  302. union/_protos/workflow/queue_service_pb2.pyi +103 -0
  303. union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  304. union/_protos/workflow/run_definition_pb2.py +100 -0
  305. union/_protos/workflow/run_definition_pb2.pyi +256 -0
  306. union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  307. union/_protos/workflow/run_logs_service_pb2.py +41 -0
  308. union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  309. union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  310. union/_protos/workflow/run_service_pb2.py +133 -0
  311. union/_protos/workflow/run_service_pb2.pyi +173 -0
  312. union/_protos/workflow/run_service_pb2_grpc.py +412 -0
  313. union/_protos/workflow/state_service_pb2.py +58 -0
  314. union/_protos/workflow/state_service_pb2.pyi +69 -0
  315. union/_protos/workflow/state_service_pb2_grpc.py +138 -0
  316. union/_protos/workflow/task_definition_pb2.py +72 -0
  317. union/_protos/workflow/task_definition_pb2.pyi +65 -0
  318. union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  319. union/_protos/workflow/task_service_pb2.py +44 -0
  320. union/_protos/workflow/task_service_pb2.pyi +31 -0
  321. union/_protos/workflow/task_service_pb2_grpc.py +104 -0
  322. union/_resources.py +226 -0
  323. union/_retry.py +32 -0
  324. union/_reusable_environment.py +25 -0
  325. union/_run.py +374 -0
  326. union/_secret.py +61 -0
  327. union/_task.py +354 -0
  328. union/_task_environment.py +186 -0
  329. union/_timeout.py +47 -0
  330. union/_tools.py +27 -0
  331. union/_utils/__init__.py +11 -0
  332. union/_utils/asyn.py +119 -0
  333. union/_utils/file_handling.py +71 -0
  334. union/_utils/helpers.py +46 -0
  335. union/_utils/lazy_module.py +54 -0
  336. union/_utils/uv_script_parser.py +49 -0
  337. union/_version.py +21 -0
  338. union/connectors/__init__.py +0 -0
  339. union/errors.py +128 -0
  340. union/extras/__init__.py +5 -0
  341. union/extras/_container.py +263 -0
  342. union/io/__init__.py +11 -0
  343. union/io/_dataframe.py +0 -0
  344. union/io/_dir.py +425 -0
  345. union/io/_file.py +418 -0
  346. union/io/pickle/__init__.py +0 -0
  347. union/io/pickle/transformer.py +117 -0
  348. union/io/structured_dataset/__init__.py +122 -0
  349. union/io/structured_dataset/basic_dfs.py +219 -0
  350. union/io/structured_dataset/structured_dataset.py +1057 -0
  351. union/py.typed +0 -0
  352. union/remote/__init__.py +23 -0
  353. union/remote/_client/__init__.py +0 -0
  354. union/remote/_client/_protocols.py +129 -0
  355. union/remote/_client/auth/__init__.py +12 -0
  356. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  357. union/remote/_client/auth/_authenticators/base.py +391 -0
  358. union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  359. union/remote/_client/auth/_authenticators/device_code.py +120 -0
  360. union/remote/_client/auth/_authenticators/external_command.py +77 -0
  361. union/remote/_client/auth/_authenticators/factory.py +200 -0
  362. union/remote/_client/auth/_authenticators/pkce.py +515 -0
  363. union/remote/_client/auth/_channel.py +184 -0
  364. union/remote/_client/auth/_client_config.py +83 -0
  365. union/remote/_client/auth/_default_html.py +32 -0
  366. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  367. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
  368. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
  369. union/remote/_client/auth/_keyring.py +154 -0
  370. union/remote/_client/auth/_token_client.py +258 -0
  371. union/remote/_client/auth/errors.py +16 -0
  372. union/remote/_client/controlplane.py +86 -0
  373. union/remote/_data.py +149 -0
  374. union/remote/_logs.py +74 -0
  375. union/remote/_project.py +86 -0
  376. union/remote/_run.py +820 -0
  377. union/remote/_secret.py +132 -0
  378. union/remote/_task.py +193 -0
  379. union/report/__init__.py +3 -0
  380. union/report/_report.py +178 -0
  381. union/report/_template.html +124 -0
  382. union/storage/__init__.py +24 -0
  383. union/storage/_remote_fs.py +34 -0
  384. union/storage/_storage.py +247 -0
  385. union/storage/_utils.py +5 -0
  386. union/types/__init__.py +11 -0
  387. union/types/_renderer.py +162 -0
  388. union/types/_string_literals.py +120 -0
  389. union/types/_type_engine.py +2131 -0
  390. union/types/_utils.py +80 -0
union/io/_file.py ADDED
@@ -0,0 +1,418 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from contextlib import asynccontextmanager, contextmanager
5
+ from pathlib import Path
6
+ from typing import IO, Any, AsyncContextManager, Generic, Optional, Type, TypeVar, Union
7
+
8
+ import aiofiles
9
+ from flyteidl.core import literals_pb2, types_pb2
10
+ from fsspec.asyn import AsyncFileSystem
11
+ from fsspec.utils import get_protocol
12
+ from pydantic import BaseModel, model_validator
13
+ from synchronicity import Synchronizer
14
+
15
+ import union.storage as storage
16
+ from union._context import internal_ctx
17
+ from union._initialize import requires_initialization
18
+ from union._logging import logger
19
+ from union.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
20
+
21
+ # Type variable for the file format
22
+ T = TypeVar("T")
23
+
24
+ synced = Synchronizer()
25
+
26
+
27
+ class File(BaseModel, Generic[T]):
28
+ """
29
+ A generic file class representing a file with a specified format.
30
+ Provides both async and sync interfaces for file operations.
31
+ Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
32
+
33
+ The generic type T represents the format of the file.
34
+
35
+ Example:
36
+ ```python
37
+ # Async usage
38
+ from pandas import DataFrame
39
+ csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
40
+
41
+ async with csv_file.open() as f:
42
+ content = await f.read()
43
+
44
+ # Sync alternative
45
+ with csv_file.open_sync() as f:
46
+ content = f.read()
47
+ ```
48
+
49
+ Example: Read a file input in a Task.
50
+ ```
51
+ @env.task
52
+ async def my_task(file: File[DataFrame]):
53
+ async with file.open() as f:
54
+ df = pd.read_csv(f)
55
+ ```
56
+
57
+ Example: Write a file by streaming it directly to blob storage
58
+ ```
59
+ @env.task
60
+ async def my_task() -> File[DataFrame]:
61
+ df = pd.DataFrame(...)
62
+ file = File.new_remote()
63
+ async with file.open("wb") as f:
64
+ df.to_csv(f)
65
+ # No additional uploading will be done here.
66
+ return file
67
+ ```
68
+ Example: Write a file by writing it locally first, and then uploading it.
69
+ ```
70
+ @env.task
71
+ async def my_task() -> File[DataFrame]:
72
+ # write to /tmp/data.csv
73
+ return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
74
+ ```
75
+
76
+ Example: From an existing remote file
77
+ ```
78
+ @env.task
79
+ async def my_task() -> File[DataFrame]:
80
+ return File.from_existing_remote("s3://my-bucket/data.csv")
81
+ ```
82
+
83
+ Example: Take a remote file as input and return the same one, should not do any copy
84
+ ```
85
+ @env.task
86
+ async def my_task(file: File[DataFrame]) -> File[DataFrame]:
87
+ return file
88
+ ```
89
+
90
+ Args:
91
+ path: The path to the file (can be local or remote)
92
+ name: Optional name for the file (defaults to basename of path)
93
+ """
94
+
95
+ path: str
96
+ name: Optional[str] = None
97
+ format: str = ""
98
+
99
+ class Config:
100
+ arbitrary_types_allowed = True
101
+
102
+ @model_validator(mode="before")
103
+ @classmethod
104
+ def pre_init(cls, data):
105
+ if data.get("name") is None:
106
+ data["name"] = Path(data["path"]).name
107
+ return data
108
+
109
+ @classmethod
110
+ @requires_initialization
111
+ def new_remote(cls) -> File[T]:
112
+ """
113
+ Create a new File reference for a remote file that will be written to.
114
+
115
+ Example:
116
+ ```
117
+ @env.task
118
+ async def my_task() -> File[DataFrame]:
119
+ df = pd.DataFrame(...)
120
+ file = File.new_remote()
121
+ async with file.open("wb") as f:
122
+ df.to_csv(f)
123
+ return file
124
+ ```
125
+ """
126
+ ctx = internal_ctx()
127
+
128
+ return cls(path=ctx.raw_data.get_random_remote_path())
129
+
130
+ @classmethod
131
+ def from_existing_remote(cls, remote_path: str) -> File[T]:
132
+ """
133
+ Create a File reference from an existing remote file.
134
+
135
+ Example:
136
+ ```python
137
+ @env.task
138
+ async def my_task() -> File[DataFrame]:
139
+ return File.from_existing_remote("s3://my-bucket/data.csv")
140
+ ```
141
+
142
+ Args:
143
+ remote_path: The remote path to the existing file
144
+ """
145
+ return cls(path=remote_path)
146
+
147
+ @asynccontextmanager
148
+ async def open(
149
+ self,
150
+ mode: str = "rb",
151
+ block_size: Optional[int] = None,
152
+ cache_type: str = "readahead",
153
+ cache_options: Optional[dict] = None,
154
+ compression: Optional[str] = None,
155
+ **kwargs,
156
+ ) -> AsyncContextManager[IO[Any]]:
157
+ """
158
+ Asynchronously open the file and return a file-like object.
159
+
160
+ Args:
161
+ mode: The mode to open the file in (default: 'rb')
162
+ block_size: Size of blocks for reading (bytes)
163
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
164
+ cache_options: Dictionary of options for the cache
165
+ compression: Compression format or None for auto-detection
166
+ **kwargs: Additional arguments passed to fsspec's open method
167
+
168
+ Returns:
169
+ An async file-like object
170
+
171
+ Example:
172
+ ```python
173
+ async with file.open('rb') as f:
174
+ data = await f.read()
175
+ ```
176
+ """
177
+ fs = storage.get_underlying_filesystem(path=self.path)
178
+
179
+ # Set up cache options if provided
180
+ if cache_options is None:
181
+ cache_options = {}
182
+
183
+ # Configure the open parameters
184
+ open_kwargs = {"mode": mode, **kwargs}
185
+ if compression:
186
+ open_kwargs["compression"] = compression
187
+
188
+ if block_size:
189
+ open_kwargs["block_size"] = block_size
190
+
191
+ # Apply caching strategy
192
+ if cache_type != "none":
193
+ open_kwargs["cache_type"] = cache_type
194
+ open_kwargs["cache_options"] = cache_options
195
+
196
+ # Use aiofiles for local files
197
+ if fs.protocol == "file":
198
+ async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
199
+ yield f
200
+ else:
201
+ # This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
202
+ file_handle = None
203
+ try:
204
+ if isinstance(fs, AsyncFileSystem):
205
+ file_handle = await fs.open_async(self.path, mode)
206
+ yield file_handle
207
+ return
208
+ except NotImplementedError:
209
+ logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
210
+ finally:
211
+ if file_handle is not None:
212
+ file_handle.close()
213
+
214
+ with fs.open(self.path, mode) as file_handle:
215
+ yield file_handle
216
+
217
+ @contextmanager
218
+ def open_sync(
219
+ self,
220
+ mode: str = "rb",
221
+ block_size: Optional[int] = None,
222
+ cache_type: str = "readahead",
223
+ cache_options: Optional[dict] = None,
224
+ compression: Optional[str] = None,
225
+ **kwargs,
226
+ ) -> IO[Any]:
227
+ """
228
+ Synchronously open the file and return a file-like object.
229
+
230
+ Args:
231
+ mode: The mode to open the file in (default: 'rb')
232
+ block_size: Size of blocks for reading (bytes)
233
+ cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
234
+ cache_options: Dictionary of options for the cache
235
+ compression: Compression format or None for auto-detection
236
+ **kwargs: Additional arguments passed to fsspec's open method
237
+
238
+ Returns:
239
+ A file-like object
240
+
241
+ Example:
242
+ ```python
243
+ with file.open_sync('rb') as f:
244
+ data = f.read()
245
+ ```
246
+ """
247
+ fs = storage.get_underlying_filesystem(path=self.path)
248
+
249
+ # Set up cache options if provided
250
+ if cache_options is None:
251
+ cache_options = {}
252
+
253
+ # Configure the open parameters
254
+ open_kwargs = {"mode": mode, "compression": compression, **kwargs}
255
+
256
+ if block_size:
257
+ open_kwargs["block_size"] = block_size
258
+
259
+ # Apply caching strategy
260
+ if cache_type != "none":
261
+ open_kwargs["cache_type"] = cache_type
262
+ open_kwargs["cache_options"] = cache_options
263
+
264
+ with fs.open(self.path, **open_kwargs) as f:
265
+ yield f
266
+
267
+ # @synced.wrap - enabling this did not work - synchronicity/pydantic issue
268
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
269
+ """
270
+ Asynchronously download the file to a local path.
271
+
272
+ Args:
273
+ local_path: The local path to download the file to. If None, a temporary
274
+ directory will be used.
275
+
276
+ Returns:
277
+ The path to the downloaded file
278
+
279
+ Example:
280
+ ```python
281
+ local_file = await file.download('/tmp/myfile.csv')
282
+ ```
283
+ """
284
+ if local_path is None:
285
+ local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
286
+ else:
287
+ local_path = str(Path(local_path).absolute())
288
+
289
+ fs = storage.get_underlying_filesystem(path=self.path)
290
+
291
+ # If it's already a local file, just copy it
292
+ if "file" in fs.protocol:
293
+ # Use aiofiles for async copy
294
+ async with aiofiles.open(self.path, "rb") as src:
295
+ async with aiofiles.open(local_path, "wb") as dst:
296
+ await dst.write(await src.read())
297
+ return local_path
298
+
299
+ # Otherwise download from remote using async functionality
300
+ await storage.get(self.path, local_path)
301
+ return local_path
302
+
303
+ @classmethod
304
+ @requires_initialization
305
+ async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
306
+ """
307
+ Create a new File object from a local file that will be uploaded to the configured remote store.
308
+
309
+ Args:
310
+ local_path: Path to the local file
311
+ remote_destination: Optional path to store the file remotely. If None, a path will be generated.
312
+
313
+ Returns:
314
+ A new File instance pointing to the uploaded file
315
+
316
+ Example:
317
+ ```python
318
+ remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
319
+ ```
320
+ """
321
+ if not os.path.exists(local_path):
322
+ raise ValueError(f"File not found: {local_path}")
323
+
324
+ remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path()
325
+ protocol = get_protocol(remote_path)
326
+ filename = Path(local_path).name
327
+
328
+ # If remote_destination was not set by the user, and the configured raw data path is also local,
329
+ # then let's optimize by not uploading.
330
+ if "file" in protocol:
331
+ if remote_destination is None:
332
+ path = str(Path(local_path).absolute())
333
+ else:
334
+ # Otherwise, actually make a copy of the file
335
+ async with aiofiles.open(remote_path, "rb") as src:
336
+ async with aiofiles.open(local_path, "wb") as dst:
337
+ await dst.write(await src.read())
338
+ path = str(Path(remote_path).absolute())
339
+ else:
340
+ # Otherwise upload to remote using async storage layer
341
+ path = await storage.put(local_path, remote_path)
342
+
343
+ f = cls(path=path, name=filename)
344
+ return f
345
+
346
+
347
+ class FileTransformer(TypeTransformer[File]):
348
+ """
349
+ Transformer for File objects. This type transformer does not handle any i/o. That is now the responsibility of the
350
+ user.
351
+ """
352
+
353
+ def __init__(self):
354
+ super().__init__(name="File", t=File)
355
+
356
+ def get_literal_type(self, t: Type[File]) -> types_pb2.LiteralType:
357
+ """Get the Flyte literal type for a File type."""
358
+ return types_pb2.LiteralType(
359
+ blob=types_pb2.BlobType(
360
+ # todo: set format from generic
361
+ format="", # Format is determined by the generic type T
362
+ dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE,
363
+ )
364
+ )
365
+
366
+ async def to_literal(
367
+ self,
368
+ python_val: File,
369
+ python_type: Type[File],
370
+ expected: types_pb2.LiteralType,
371
+ ) -> literals_pb2.Literal:
372
+ """Convert a File object to a Flyte literal."""
373
+ if not isinstance(python_val, File):
374
+ raise TypeTransformerFailedError(f"Expected File object, received {type(python_val)}")
375
+
376
+ return literals_pb2.Literal(
377
+ scalar=literals_pb2.Scalar(
378
+ blob=literals_pb2.Blob(
379
+ metadata=literals_pb2.BlobMetadata(
380
+ type=types_pb2.BlobType(
381
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
382
+ )
383
+ ),
384
+ uri=python_val.path,
385
+ )
386
+ )
387
+ )
388
+
389
+ async def to_python_value(
390
+ self,
391
+ lv: literals_pb2.Literal,
392
+ expected_python_type: Type[File],
393
+ ) -> File:
394
+ """Convert a Flyte literal to a File object."""
395
+ if not lv.scalar.HasField("blob"):
396
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
397
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE:
398
+ raise TypeTransformerFailedError(
399
+ f"Expected single part blob, received {lv.scalar.blob.metadata.type.dimensionality}"
400
+ )
401
+
402
+ uri = lv.scalar.blob.uri
403
+ filename = Path(uri).name
404
+ f = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
405
+ return f
406
+
407
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
408
+ """Guess the Python type from a Flyte literal type."""
409
+ if (
410
+ literal_type.HasField("blob")
411
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
412
+ and literal_type.blob.format != "PythonPickle" # see pickle transformer
413
+ ):
414
+ return File
415
+ raise ValueError(f"Cannot guess python type from {literal_type}")
416
+
417
+
418
+ TypeEngine.register(FileTransformer())
File without changes
@@ -0,0 +1,117 @@
1
+ import hashlib
2
+ import os
3
+ import typing
4
+ from typing import Type
5
+
6
+ import aiofiles
7
+ import cloudpickle
8
+ from flyteidl.core import literals_pb2, types_pb2
9
+
10
+ import union.storage as storage
11
+ from union.types import TypeEngine, TypeTransformer
12
+
13
+ T = typing.TypeVar("T")
14
+
15
+
16
+ class FlytePickle(typing.Generic[T]):
17
+ """
18
+ This type is only used by flytekit internally. User should not use this type.
19
+ Any type that flyte can't recognize will become FlytePickle
20
+ """
21
+
22
+ @classmethod
23
+ def python_type(cls) -> typing.Type:
24
+ return type(None)
25
+
26
+ @classmethod
27
+ def __class_getitem__(cls, python_type: typing.Type) -> typing.Type:
28
+ if python_type is None:
29
+ return cls
30
+
31
+ class _SpecificFormatClass(FlytePickle):
32
+ # Get the type engine to see this as kind of a generic
33
+ __origin__ = FlytePickle
34
+
35
+ @classmethod
36
+ def python_type(cls) -> typing.Type:
37
+ return python_type
38
+
39
+ return _SpecificFormatClass
40
+
41
+ @classmethod
42
+ async def to_pickle(cls, python_val: typing.Any) -> str:
43
+ h = hashlib.md5()
44
+ str_bytes = cloudpickle.dumps(python_val)
45
+ h.update(str_bytes)
46
+
47
+ uri = storage.get_random_local_path(file_path_or_file_name=h.hexdigest())
48
+ os.makedirs(os.path.dirname(uri), exist_ok=True)
49
+ async with aiofiles.open(uri, "w+b") as outfile:
50
+ await outfile.write(str_bytes)
51
+
52
+ return await storage.put(str(uri))
53
+
54
+ @classmethod
55
+ async def from_pickle(cls, uri: str) -> typing.Any:
56
+ # Deserialize the pickle, and return data in the pickle,
57
+ # and download pickle file to local first if file is not in the local file systems.
58
+ if storage.is_remote(uri):
59
+ local_path = storage.get_random_local_path()
60
+ await storage.get(uri, str(local_path), False)
61
+ uri = local_path
62
+ async with aiofiles.open(uri, "rb") as infile:
63
+ data = cloudpickle.loads(await infile.read())
64
+ return data
65
+
66
+
67
+ class FlytePickleTransformer(TypeTransformer[FlytePickle]):
68
+ PYTHON_PICKLE_FORMAT = "PythonPickle"
69
+
70
+ def __init__(self):
71
+ super().__init__(name="FlytePickle", t=FlytePickle)
72
+
73
+ def assert_type(self, t: Type[T], v: T):
74
+ # Every type can serialize to pickle, so we don't need to check the type here.
75
+ ...
76
+
77
+ async def to_python_value(self, lv: literals_pb2.Literal, expected_python_type: Type[T]) -> T:
78
+ uri = lv.scalar.blob.uri
79
+ return await FlytePickle.from_pickle(uri)
80
+
81
+ async def to_literal(
82
+ self,
83
+ python_val: T,
84
+ python_type: Type[T],
85
+ expected: types_pb2.LiteralType,
86
+ ) -> literals_pb2.Literal:
87
+ if python_val is None:
88
+ raise AssertionError("Cannot pickle None Value.")
89
+ meta = literals_pb2.BlobMetadata(
90
+ type=types_pb2.BlobType(
91
+ format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
92
+ )
93
+ )
94
+ remote_path = await FlytePickle.to_pickle(python_val)
95
+ return literals_pb2.Literal(scalar=literals_pb2.Scalar(blob=literals_pb2.Blob(metadata=meta, uri=remote_path)))
96
+
97
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> typing.Type[FlytePickle[typing.Any]]:
98
+ if (
99
+ literal_type.blob is not None
100
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
101
+ and literal_type.blob.format == FlytePickleTransformer.PYTHON_PICKLE_FORMAT
102
+ ):
103
+ return FlytePickle
104
+
105
+ raise ValueError(f"Transformer {self} cannot reverse {literal_type}")
106
+
107
+ def get_literal_type(self, t: Type[T]) -> types_pb2.LiteralType:
108
+ lt = types_pb2.LiteralType(
109
+ blob=types_pb2.BlobType(
110
+ format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
111
+ )
112
+ )
113
+ lt.metadata = {"python_class_name": str(t)}
114
+ return lt
115
+
116
+
117
+ TypeEngine.register(FlytePickleTransformer())
@@ -0,0 +1,122 @@
1
+ """
2
+ Flytekit StructuredDataset
3
+ ==========================================================
4
+ .. currentmodule:: flytekit.types.structured
5
+
6
+ .. autosummary::
7
+ :template: custom.rst
8
+ :toctree: generated/
9
+
10
+ StructuredDataset
11
+ StructuredDatasetDecoder
12
+ StructuredDatasetEncoder
13
+ """
14
+
15
+ from union._logging import logger
16
+ from union._utils.lazy_module import is_imported
17
+
18
+ from .structured_dataset import (
19
+ DuplicateHandlerError,
20
+ StructuredDataset,
21
+ StructuredDatasetDecoder,
22
+ StructuredDatasetEncoder,
23
+ StructuredDatasetTransformerEngine,
24
+ )
25
+
26
+
27
+ def register_csv_handlers():
28
+ from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
29
+
30
+ StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
31
+ StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
32
+
33
+
34
+ def register_pandas_handlers():
35
+ import pandas as pd
36
+
37
+ from union.types._renderer import TopFrameRenderer
38
+
39
+ from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
40
+
41
+ StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
42
+ StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
43
+ StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
44
+
45
+
46
+ def register_arrow_handlers():
47
+ import pyarrow as pa
48
+
49
+ from union.types._renderer import ArrowRenderer
50
+
51
+ from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
52
+
53
+ StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
54
+ StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
55
+ StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
56
+
57
+
58
+ def register_bigquery_handlers():
59
+ try:
60
+ from .bigquery import (
61
+ ArrowToBQEncodingHandlers,
62
+ BQToArrowDecodingHandler,
63
+ BQToPandasDecodingHandler,
64
+ PandasToBQEncodingHandlers,
65
+ )
66
+
67
+ StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
68
+ StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
69
+ StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
70
+ StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
71
+ except ImportError:
72
+ logger.info(
73
+ "We won't register bigquery handler for structured dataset because "
74
+ "we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
75
+ )
76
+
77
+
78
+ def register_snowflake_handlers():
79
+ try:
80
+ from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
81
+
82
+ StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
83
+ StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
84
+
85
+ except ImportError:
86
+ logger.info(
87
+ "We won't register snowflake handler for structured dataset because "
88
+ "we can't find package snowflake-connector-python"
89
+ )
90
+
91
+
92
+ def lazy_import_structured_dataset_handler():
93
+ if is_imported("pandas"):
94
+ try:
95
+ register_pandas_handlers()
96
+ register_csv_handlers()
97
+ except DuplicateHandlerError:
98
+ logger.debug("Transformer for pandas is already registered.")
99
+ if is_imported("pyarrow"):
100
+ try:
101
+ register_arrow_handlers()
102
+ except DuplicateHandlerError:
103
+ logger.debug("Transformer for arrow is already registered.")
104
+ if is_imported("google.cloud.bigquery"):
105
+ try:
106
+ register_bigquery_handlers()
107
+ except DuplicateHandlerError:
108
+ logger.debug("Transformer for bigquery is already registered.")
109
+ if is_imported("snowflake.connector"):
110
+ try:
111
+ register_snowflake_handlers()
112
+ except DuplicateHandlerError:
113
+ logger.debug("Transformer for snowflake is already registered.")
114
+
115
+
116
+ __all__ = [
117
+ "StructuredDataset",
118
+ "StructuredDatasetDecoder",
119
+ "StructuredDatasetEncoder",
120
+ "StructuredDatasetTransformerEngine",
121
+ "lazy_import_structured_dataset_handler",
122
+ ]