flyte 0.0.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (390) hide show
  1. flyte/__init__.py +62 -0
  2. flyte/_api_commons.py +3 -0
  3. flyte/_bin/__init__.py +0 -0
  4. flyte/_bin/runtime.py +126 -0
  5. flyte/_build.py +25 -0
  6. flyte/_cache/__init__.py +12 -0
  7. flyte/_cache/cache.py +146 -0
  8. flyte/_cache/defaults.py +9 -0
  9. flyte/_cache/policy_function_body.py +42 -0
  10. flyte/_cli/__init__.py +0 -0
  11. flyte/_cli/_common.py +287 -0
  12. flyte/_cli/_create.py +42 -0
  13. flyte/_cli/_delete.py +23 -0
  14. flyte/_cli/_deploy.py +140 -0
  15. flyte/_cli/_get.py +235 -0
  16. flyte/_cli/_run.py +152 -0
  17. flyte/_cli/main.py +72 -0
  18. flyte/_code_bundle/__init__.py +8 -0
  19. flyte/_code_bundle/_ignore.py +113 -0
  20. flyte/_code_bundle/_packaging.py +187 -0
  21. flyte/_code_bundle/_utils.py +339 -0
  22. flyte/_code_bundle/bundle.py +178 -0
  23. flyte/_context.py +146 -0
  24. flyte/_datastructures.py +342 -0
  25. flyte/_deploy.py +202 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +43 -0
  29. flyte/_group.py +31 -0
  30. flyte/_hash.py +23 -0
  31. flyte/_image.py +760 -0
  32. flyte/_initialize.py +634 -0
  33. flyte/_interface.py +84 -0
  34. flyte/_internal/__init__.py +3 -0
  35. flyte/_internal/controllers/__init__.py +115 -0
  36. flyte/_internal/controllers/_local_controller.py +118 -0
  37. flyte/_internal/controllers/_trace.py +40 -0
  38. flyte/_internal/controllers/pbhash.py +39 -0
  39. flyte/_internal/controllers/remote/__init__.py +40 -0
  40. flyte/_internal/controllers/remote/_action.py +141 -0
  41. flyte/_internal/controllers/remote/_client.py +43 -0
  42. flyte/_internal/controllers/remote/_controller.py +361 -0
  43. flyte/_internal/controllers/remote/_core.py +402 -0
  44. flyte/_internal/controllers/remote/_informer.py +361 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +11 -0
  47. flyte/_internal/imagebuild/docker_builder.py +416 -0
  48. flyte/_internal/imagebuild/image_builder.py +241 -0
  49. flyte/_internal/imagebuild/remote_builder.py +0 -0
  50. flyte/_internal/resolvers/__init__.py +0 -0
  51. flyte/_internal/resolvers/_task_module.py +54 -0
  52. flyte/_internal/resolvers/common.py +31 -0
  53. flyte/_internal/resolvers/default.py +28 -0
  54. flyte/_internal/runtime/__init__.py +0 -0
  55. flyte/_internal/runtime/convert.py +199 -0
  56. flyte/_internal/runtime/entrypoints.py +135 -0
  57. flyte/_internal/runtime/io.py +136 -0
  58. flyte/_internal/runtime/resources_serde.py +138 -0
  59. flyte/_internal/runtime/task_serde.py +210 -0
  60. flyte/_internal/runtime/taskrunner.py +190 -0
  61. flyte/_internal/runtime/types_serde.py +54 -0
  62. flyte/_logging.py +124 -0
  63. flyte/_protos/__init__.py +0 -0
  64. flyte/_protos/common/authorization_pb2.py +66 -0
  65. flyte/_protos/common/authorization_pb2.pyi +108 -0
  66. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  67. flyte/_protos/common/identifier_pb2.py +71 -0
  68. flyte/_protos/common/identifier_pb2.pyi +82 -0
  69. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  70. flyte/_protos/common/identity_pb2.py +48 -0
  71. flyte/_protos/common/identity_pb2.pyi +72 -0
  72. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  73. flyte/_protos/common/list_pb2.py +36 -0
  74. flyte/_protos/common/list_pb2.pyi +69 -0
  75. flyte/_protos/common/list_pb2_grpc.py +4 -0
  76. flyte/_protos/common/policy_pb2.py +37 -0
  77. flyte/_protos/common/policy_pb2.pyi +27 -0
  78. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  79. flyte/_protos/common/role_pb2.py +37 -0
  80. flyte/_protos/common/role_pb2.pyi +53 -0
  81. flyte/_protos/common/role_pb2_grpc.py +4 -0
  82. flyte/_protos/common/runtime_version_pb2.py +28 -0
  83. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  84. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  85. flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
  86. flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  87. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  88. flyte/_protos/secret/definition_pb2.py +49 -0
  89. flyte/_protos/secret/definition_pb2.pyi +93 -0
  90. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  91. flyte/_protos/secret/payload_pb2.py +62 -0
  92. flyte/_protos/secret/payload_pb2.pyi +94 -0
  93. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  94. flyte/_protos/secret/secret_pb2.py +38 -0
  95. flyte/_protos/secret/secret_pb2.pyi +6 -0
  96. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  97. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  98. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  99. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  100. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  101. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  102. flyte/_protos/workflow/queue_service_pb2.py +106 -0
  103. flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
  104. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  105. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  106. flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
  107. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  108. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  109. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  110. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  111. flyte/_protos/workflow/run_service_pb2.py +133 -0
  112. flyte/_protos/workflow/run_service_pb2.pyi +175 -0
  113. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  114. flyte/_protos/workflow/state_service_pb2.py +58 -0
  115. flyte/_protos/workflow/state_service_pb2.pyi +71 -0
  116. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  117. flyte/_protos/workflow/task_definition_pb2.py +72 -0
  118. flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
  119. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  120. flyte/_protos/workflow/task_service_pb2.py +44 -0
  121. flyte/_protos/workflow/task_service_pb2.pyi +31 -0
  122. flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
  123. flyte/_resources.py +226 -0
  124. flyte/_retry.py +32 -0
  125. flyte/_reusable_environment.py +25 -0
  126. flyte/_run.py +411 -0
  127. flyte/_secret.py +61 -0
  128. flyte/_task.py +367 -0
  129. flyte/_task_environment.py +200 -0
  130. flyte/_timeout.py +47 -0
  131. flyte/_tools.py +27 -0
  132. flyte/_trace.py +128 -0
  133. flyte/_utils/__init__.py +20 -0
  134. flyte/_utils/asyn.py +119 -0
  135. flyte/_utils/coro_management.py +25 -0
  136. flyte/_utils/file_handling.py +72 -0
  137. flyte/_utils/helpers.py +108 -0
  138. flyte/_utils/lazy_module.py +54 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/connectors/__init__.py +0 -0
  142. flyte/errors.py +143 -0
  143. flyte/extras/__init__.py +5 -0
  144. flyte/extras/_container.py +273 -0
  145. flyte/io/__init__.py +11 -0
  146. flyte/io/_dataframe.py +0 -0
  147. flyte/io/_dir.py +448 -0
  148. flyte/io/_file.py +468 -0
  149. flyte/io/pickle/__init__.py +0 -0
  150. flyte/io/pickle/transformer.py +117 -0
  151. flyte/io/structured_dataset/__init__.py +129 -0
  152. flyte/io/structured_dataset/basic_dfs.py +219 -0
  153. flyte/io/structured_dataset/structured_dataset.py +1061 -0
  154. flyte/py.typed +0 -0
  155. flyte/remote/__init__.py +25 -0
  156. flyte/remote/_client/__init__.py +0 -0
  157. flyte/remote/_client/_protocols.py +131 -0
  158. flyte/remote/_client/auth/__init__.py +12 -0
  159. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  160. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  161. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  162. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  163. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  164. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  165. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  166. flyte/remote/_client/auth/_channel.py +184 -0
  167. flyte/remote/_client/auth/_client_config.py +83 -0
  168. flyte/remote/_client/auth/_default_html.py +32 -0
  169. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  170. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  171. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  172. flyte/remote/_client/auth/_keyring.py +143 -0
  173. flyte/remote/_client/auth/_token_client.py +260 -0
  174. flyte/remote/_client/auth/errors.py +16 -0
  175. flyte/remote/_client/controlplane.py +95 -0
  176. flyte/remote/_console.py +18 -0
  177. flyte/remote/_data.py +155 -0
  178. flyte/remote/_logs.py +116 -0
  179. flyte/remote/_project.py +86 -0
  180. flyte/remote/_run.py +873 -0
  181. flyte/remote/_secret.py +132 -0
  182. flyte/remote/_task.py +227 -0
  183. flyte/report/__init__.py +3 -0
  184. flyte/report/_report.py +178 -0
  185. flyte/report/_template.html +124 -0
  186. flyte/storage/__init__.py +24 -0
  187. flyte/storage/_remote_fs.py +34 -0
  188. flyte/storage/_storage.py +251 -0
  189. flyte/storage/_utils.py +5 -0
  190. flyte/types/__init__.py +13 -0
  191. flyte/types/_interface.py +25 -0
  192. flyte/types/_renderer.py +162 -0
  193. flyte/types/_string_literals.py +120 -0
  194. flyte/types/_type_engine.py +2210 -0
  195. flyte/types/_utils.py +80 -0
  196. flyte-0.0.1b0.dist-info/METADATA +179 -0
  197. flyte-0.0.1b0.dist-info/RECORD +390 -0
  198. flyte-0.0.1b0.dist-info/WHEEL +5 -0
  199. flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
  200. flyte-0.0.1b0.dist-info/top_level.txt +1 -0
  201. union/__init__.py +54 -0
  202. union/_api_commons.py +3 -0
  203. union/_bin/__init__.py +0 -0
  204. union/_bin/runtime.py +113 -0
  205. union/_build.py +25 -0
  206. union/_cache/__init__.py +12 -0
  207. union/_cache/cache.py +141 -0
  208. union/_cache/defaults.py +9 -0
  209. union/_cache/policy_function_body.py +42 -0
  210. union/_cli/__init__.py +0 -0
  211. union/_cli/_common.py +263 -0
  212. union/_cli/_create.py +40 -0
  213. union/_cli/_delete.py +23 -0
  214. union/_cli/_deploy.py +120 -0
  215. union/_cli/_get.py +162 -0
  216. union/_cli/_params.py +579 -0
  217. union/_cli/_run.py +150 -0
  218. union/_cli/main.py +72 -0
  219. union/_code_bundle/__init__.py +8 -0
  220. union/_code_bundle/_ignore.py +113 -0
  221. union/_code_bundle/_packaging.py +187 -0
  222. union/_code_bundle/_utils.py +342 -0
  223. union/_code_bundle/bundle.py +176 -0
  224. union/_context.py +146 -0
  225. union/_datastructures.py +295 -0
  226. union/_deploy.py +185 -0
  227. union/_doc.py +29 -0
  228. union/_docstring.py +26 -0
  229. union/_environment.py +43 -0
  230. union/_group.py +31 -0
  231. union/_hash.py +23 -0
  232. union/_image.py +760 -0
  233. union/_initialize.py +585 -0
  234. union/_interface.py +84 -0
  235. union/_internal/__init__.py +3 -0
  236. union/_internal/controllers/__init__.py +77 -0
  237. union/_internal/controllers/_local_controller.py +77 -0
  238. union/_internal/controllers/pbhash.py +39 -0
  239. union/_internal/controllers/remote/__init__.py +40 -0
  240. union/_internal/controllers/remote/_action.py +131 -0
  241. union/_internal/controllers/remote/_client.py +43 -0
  242. union/_internal/controllers/remote/_controller.py +169 -0
  243. union/_internal/controllers/remote/_core.py +341 -0
  244. union/_internal/controllers/remote/_informer.py +260 -0
  245. union/_internal/controllers/remote/_service_protocol.py +44 -0
  246. union/_internal/imagebuild/__init__.py +11 -0
  247. union/_internal/imagebuild/docker_builder.py +416 -0
  248. union/_internal/imagebuild/image_builder.py +243 -0
  249. union/_internal/imagebuild/remote_builder.py +0 -0
  250. union/_internal/resolvers/__init__.py +0 -0
  251. union/_internal/resolvers/_task_module.py +31 -0
  252. union/_internal/resolvers/common.py +24 -0
  253. union/_internal/resolvers/default.py +27 -0
  254. union/_internal/runtime/__init__.py +0 -0
  255. union/_internal/runtime/convert.py +163 -0
  256. union/_internal/runtime/entrypoints.py +121 -0
  257. union/_internal/runtime/io.py +136 -0
  258. union/_internal/runtime/resources_serde.py +134 -0
  259. union/_internal/runtime/task_serde.py +202 -0
  260. union/_internal/runtime/taskrunner.py +179 -0
  261. union/_internal/runtime/types_serde.py +53 -0
  262. union/_logging.py +124 -0
  263. union/_protos/__init__.py +0 -0
  264. union/_protos/common/authorization_pb2.py +66 -0
  265. union/_protos/common/authorization_pb2.pyi +106 -0
  266. union/_protos/common/authorization_pb2_grpc.py +4 -0
  267. union/_protos/common/identifier_pb2.py +71 -0
  268. union/_protos/common/identifier_pb2.pyi +82 -0
  269. union/_protos/common/identifier_pb2_grpc.py +4 -0
  270. union/_protos/common/identity_pb2.py +48 -0
  271. union/_protos/common/identity_pb2.pyi +72 -0
  272. union/_protos/common/identity_pb2_grpc.py +4 -0
  273. union/_protos/common/list_pb2.py +36 -0
  274. union/_protos/common/list_pb2.pyi +69 -0
  275. union/_protos/common/list_pb2_grpc.py +4 -0
  276. union/_protos/common/policy_pb2.py +37 -0
  277. union/_protos/common/policy_pb2.pyi +27 -0
  278. union/_protos/common/policy_pb2_grpc.py +4 -0
  279. union/_protos/common/role_pb2.py +37 -0
  280. union/_protos/common/role_pb2.pyi +51 -0
  281. union/_protos/common/role_pb2_grpc.py +4 -0
  282. union/_protos/common/runtime_version_pb2.py +28 -0
  283. union/_protos/common/runtime_version_pb2.pyi +24 -0
  284. union/_protos/common/runtime_version_pb2_grpc.py +4 -0
  285. union/_protos/logs/dataplane/payload_pb2.py +96 -0
  286. union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  287. union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  288. union/_protos/secret/definition_pb2.py +49 -0
  289. union/_protos/secret/definition_pb2.pyi +93 -0
  290. union/_protos/secret/definition_pb2_grpc.py +4 -0
  291. union/_protos/secret/payload_pb2.py +62 -0
  292. union/_protos/secret/payload_pb2.pyi +94 -0
  293. union/_protos/secret/payload_pb2_grpc.py +4 -0
  294. union/_protos/secret/secret_pb2.py +38 -0
  295. union/_protos/secret/secret_pb2.pyi +6 -0
  296. union/_protos/secret/secret_pb2_grpc.py +198 -0
  297. union/_protos/validate/validate/validate_pb2.py +76 -0
  298. union/_protos/workflow/node_execution_service_pb2.py +26 -0
  299. union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  300. union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  301. union/_protos/workflow/queue_service_pb2.py +75 -0
  302. union/_protos/workflow/queue_service_pb2.pyi +103 -0
  303. union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  304. union/_protos/workflow/run_definition_pb2.py +100 -0
  305. union/_protos/workflow/run_definition_pb2.pyi +256 -0
  306. union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  307. union/_protos/workflow/run_logs_service_pb2.py +41 -0
  308. union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  309. union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  310. union/_protos/workflow/run_service_pb2.py +133 -0
  311. union/_protos/workflow/run_service_pb2.pyi +173 -0
  312. union/_protos/workflow/run_service_pb2_grpc.py +412 -0
  313. union/_protos/workflow/state_service_pb2.py +58 -0
  314. union/_protos/workflow/state_service_pb2.pyi +69 -0
  315. union/_protos/workflow/state_service_pb2_grpc.py +138 -0
  316. union/_protos/workflow/task_definition_pb2.py +72 -0
  317. union/_protos/workflow/task_definition_pb2.pyi +65 -0
  318. union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  319. union/_protos/workflow/task_service_pb2.py +44 -0
  320. union/_protos/workflow/task_service_pb2.pyi +31 -0
  321. union/_protos/workflow/task_service_pb2_grpc.py +104 -0
  322. union/_resources.py +226 -0
  323. union/_retry.py +32 -0
  324. union/_reusable_environment.py +25 -0
  325. union/_run.py +374 -0
  326. union/_secret.py +61 -0
  327. union/_task.py +354 -0
  328. union/_task_environment.py +186 -0
  329. union/_timeout.py +47 -0
  330. union/_tools.py +27 -0
  331. union/_utils/__init__.py +11 -0
  332. union/_utils/asyn.py +119 -0
  333. union/_utils/file_handling.py +71 -0
  334. union/_utils/helpers.py +46 -0
  335. union/_utils/lazy_module.py +54 -0
  336. union/_utils/uv_script_parser.py +49 -0
  337. union/_version.py +21 -0
  338. union/connectors/__init__.py +0 -0
  339. union/errors.py +128 -0
  340. union/extras/__init__.py +5 -0
  341. union/extras/_container.py +263 -0
  342. union/io/__init__.py +11 -0
  343. union/io/_dataframe.py +0 -0
  344. union/io/_dir.py +425 -0
  345. union/io/_file.py +418 -0
  346. union/io/pickle/__init__.py +0 -0
  347. union/io/pickle/transformer.py +117 -0
  348. union/io/structured_dataset/__init__.py +122 -0
  349. union/io/structured_dataset/basic_dfs.py +219 -0
  350. union/io/structured_dataset/structured_dataset.py +1057 -0
  351. union/py.typed +0 -0
  352. union/remote/__init__.py +23 -0
  353. union/remote/_client/__init__.py +0 -0
  354. union/remote/_client/_protocols.py +129 -0
  355. union/remote/_client/auth/__init__.py +12 -0
  356. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  357. union/remote/_client/auth/_authenticators/base.py +391 -0
  358. union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  359. union/remote/_client/auth/_authenticators/device_code.py +120 -0
  360. union/remote/_client/auth/_authenticators/external_command.py +77 -0
  361. union/remote/_client/auth/_authenticators/factory.py +200 -0
  362. union/remote/_client/auth/_authenticators/pkce.py +515 -0
  363. union/remote/_client/auth/_channel.py +184 -0
  364. union/remote/_client/auth/_client_config.py +83 -0
  365. union/remote/_client/auth/_default_html.py +32 -0
  366. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  367. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
  368. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
  369. union/remote/_client/auth/_keyring.py +154 -0
  370. union/remote/_client/auth/_token_client.py +258 -0
  371. union/remote/_client/auth/errors.py +16 -0
  372. union/remote/_client/controlplane.py +86 -0
  373. union/remote/_data.py +149 -0
  374. union/remote/_logs.py +74 -0
  375. union/remote/_project.py +86 -0
  376. union/remote/_run.py +820 -0
  377. union/remote/_secret.py +132 -0
  378. union/remote/_task.py +193 -0
  379. union/report/__init__.py +3 -0
  380. union/report/_report.py +178 -0
  381. union/report/_template.html +124 -0
  382. union/storage/__init__.py +24 -0
  383. union/storage/_remote_fs.py +34 -0
  384. union/storage/_storage.py +247 -0
  385. union/storage/_utils.py +5 -0
  386. union/types/__init__.py +11 -0
  387. union/types/_renderer.py +162 -0
  388. union/types/_string_literals.py +120 -0
  389. union/types/_type_engine.py +2131 -0
  390. union/types/_utils.py +80 -0
@@ -0,0 +1,129 @@
1
+ """
2
+ Flytekit StructuredDataset
3
+ ==========================================================
4
+ .. currentmodule:: flytekit.types.structured
5
+
6
+ .. autosummary::
7
+ :template: custom.rst
8
+ :toctree: generated/
9
+
10
+ StructuredDataset
11
+ StructuredDatasetDecoder
12
+ StructuredDatasetEncoder
13
+ """
14
+
15
+ import functools
16
+
17
+ from flyte._logging import logger
18
+ from flyte._utils.lazy_module import is_imported
19
+
20
+ from .structured_dataset import (
21
+ DuplicateHandlerError,
22
+ StructuredDataset,
23
+ StructuredDatasetDecoder,
24
+ StructuredDatasetEncoder,
25
+ StructuredDatasetTransformerEngine,
26
+ )
27
+
28
+
29
+ @functools.lru_cache(maxsize=None)
30
+ def register_csv_handlers():
31
+ from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
32
+
33
+ StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
34
+ StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
35
+
36
+
37
+ @functools.lru_cache(maxsize=None)
38
+ def register_pandas_handlers():
39
+ import pandas as pd
40
+
41
+ from flyte.types._renderer import TopFrameRenderer
42
+
43
+ from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
44
+
45
+ StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
46
+ StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
47
+ StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
48
+
49
+
50
+ @functools.lru_cache(maxsize=None)
51
+ def register_arrow_handlers():
52
+ import pyarrow as pa
53
+
54
+ from flyte.types._renderer import ArrowRenderer
55
+
56
+ from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
57
+
58
+ StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
59
+ StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
60
+ StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
61
+
62
+
63
+ @functools.lru_cache(maxsize=None)
64
+ def register_bigquery_handlers():
65
+ try:
66
+ from .bigquery import (
67
+ ArrowToBQEncodingHandlers,
68
+ BQToArrowDecodingHandler,
69
+ BQToPandasDecodingHandler,
70
+ PandasToBQEncodingHandlers,
71
+ )
72
+
73
+ StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
74
+ StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
75
+ StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
76
+ StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
77
+ except ImportError:
78
+ logger.info(
79
+ "We won't register bigquery handler for structured dataset because "
80
+ "we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
81
+ )
82
+
83
+
84
+ @functools.lru_cache(maxsize=None)
85
+ def register_snowflake_handlers():
86
+ try:
87
+ from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
88
+
89
+ StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
90
+ StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
91
+
92
+ except ImportError:
93
+ logger.info(
94
+ "We won't register snowflake handler for structured dataset because "
95
+ "we can't find package snowflake-connector-python"
96
+ )
97
+
98
+
99
+ def lazy_import_structured_dataset_handler():
100
+ if is_imported("pandas"):
101
+ try:
102
+ register_pandas_handlers()
103
+ register_csv_handlers()
104
+ except DuplicateHandlerError:
105
+ logger.debug("Transformer for pandas is already registered.")
106
+ if is_imported("pyarrow"):
107
+ try:
108
+ register_arrow_handlers()
109
+ except DuplicateHandlerError:
110
+ logger.debug("Transformer for arrow is already registered.")
111
+ if is_imported("google.cloud.bigquery"):
112
+ try:
113
+ register_bigquery_handlers()
114
+ except DuplicateHandlerError:
115
+ logger.debug("Transformer for bigquery is already registered.")
116
+ if is_imported("snowflake.connector"):
117
+ try:
118
+ register_snowflake_handlers()
119
+ except DuplicateHandlerError:
120
+ logger.debug("Transformer for snowflake is already registered.")
121
+
122
+
123
+ __all__ = [
124
+ "StructuredDataset",
125
+ "StructuredDatasetDecoder",
126
+ "StructuredDatasetEncoder",
127
+ "StructuredDatasetTransformerEngine",
128
+ "lazy_import_structured_dataset_handler",
129
+ ]
@@ -0,0 +1,219 @@
1
+ import os
2
+ import typing
3
+ from pathlib import Path
4
+ from typing import TypeVar
5
+
6
+ from flyteidl.core import literals_pb2, types_pb2
7
+ from fsspec.core import split_protocol, strip_protocol
8
+
9
+ import flyte.storage as storage
10
+ from flyte._logging import logger
11
+ from flyte._utils import lazy_module
12
+ from flyte.io.structured_dataset.structured_dataset import (
13
+ CSV,
14
+ PARQUET,
15
+ StructuredDataset,
16
+ StructuredDatasetDecoder,
17
+ StructuredDatasetEncoder,
18
+ )
19
+
20
+ if typing.TYPE_CHECKING:
21
+ import pandas as pd
22
+ import pyarrow as pa
23
+ else:
24
+ pd = lazy_module("pandas")
25
+ pa = lazy_module("pyarrow")
26
+
27
+ T = TypeVar("T")
28
+
29
+
30
+ # pr: add back after storage
31
+ def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
32
+ from pandas.io.common import is_fsspec_url # type: ignore
33
+
34
+ if is_fsspec_url(uri):
35
+ if uri.startswith("s3"):
36
+ # pr: after storage, replace with real call to get_fsspec_storage_options
37
+ return {
38
+ "cache_regions": True,
39
+ "client_kwargs": {"endpoint_url": "http://localhost:30002"},
40
+ "key": "minio",
41
+ "secret": "miniostorage",
42
+ }
43
+ return {}
44
+
45
+ # Pandas does not allow storage_options for non-fsspec paths e.g. local.
46
+ return None
47
+
48
+
49
+ class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
50
+ def __init__(self):
51
+ super().__init__(pd.DataFrame, None, CSV)
52
+
53
+ async def encode(
54
+ self,
55
+ structured_dataset: StructuredDataset,
56
+ structured_dataset_type: types_pb2.StructuredDatasetType,
57
+ ) -> literals_pb2.StructuredDataset:
58
+ if not structured_dataset.uri:
59
+ from flyte._context import internal_ctx
60
+
61
+ ctx = internal_ctx()
62
+ uri = ctx.raw_data.get_random_remote_path()
63
+ else:
64
+ uri = typing.cast(str, structured_dataset.uri)
65
+
66
+ if not storage.is_remote(uri):
67
+ Path(uri).mkdir(parents=True, exist_ok=True)
68
+ path = os.path.join(uri, ".csv")
69
+ df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
70
+ df.to_csv(
71
+ path,
72
+ index=False,
73
+ storage_options=get_pandas_storage_options(uri=path, data_config=None),
74
+ )
75
+ structured_dataset_type.format = CSV
76
+ return literals_pb2.StructuredDataset(
77
+ uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
78
+ )
79
+
80
+
81
+ class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
82
+ def __init__(self):
83
+ super().__init__(pd.DataFrame, None, CSV)
84
+
85
+ async def decode(
86
+ self,
87
+ proto_value: literals_pb2.StructuredDataset,
88
+ current_task_metadata: literals_pb2.StructuredDatasetMetadata,
89
+ ) -> "pd.DataFrame":
90
+ from botocore.exceptions import NoCredentialsError
91
+
92
+ uri = proto_value.uri
93
+ columns = None
94
+ kwargs = get_pandas_storage_options(uri=uri, data_config=None)
95
+ path = os.path.join(uri, ".csv")
96
+ if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
97
+ columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
98
+ try:
99
+ return pd.read_csv(path, usecols=columns, storage_options=kwargs)
100
+ except NoCredentialsError:
101
+ logger.debug("S3 source detected, attempting anonymous S3 access")
102
+ kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
103
+ return pd.read_csv(path, usecols=columns, storage_options=kwargs)
104
+
105
+
106
+ class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
107
+ def __init__(self):
108
+ super().__init__(pd.DataFrame, None, PARQUET)
109
+
110
+ async def encode(
111
+ self,
112
+ structured_dataset: StructuredDataset,
113
+ structured_dataset_type: types_pb2.StructuredDatasetType,
114
+ ) -> literals_pb2.StructuredDataset:
115
+ if not structured_dataset.uri:
116
+ from flyte._context import internal_ctx
117
+
118
+ ctx = internal_ctx()
119
+ uri = str(ctx.raw_data.get_random_remote_path())
120
+ else:
121
+ uri = typing.cast(str, structured_dataset.uri)
122
+
123
+ if not storage.is_remote(uri):
124
+ Path(uri).mkdir(parents=True, exist_ok=True)
125
+ path = os.path.join(uri, f"{0:05}")
126
+ df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
127
+ df.to_parquet(
128
+ path,
129
+ coerce_timestamps="us",
130
+ allow_truncated_timestamps=False,
131
+ storage_options=get_pandas_storage_options(uri=path, data_config=None),
132
+ )
133
+ structured_dataset_type.format = PARQUET
134
+ return literals_pb2.StructuredDataset(
135
+ uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
136
+ )
137
+
138
+
139
+ class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
140
+ def __init__(self):
141
+ super().__init__(pd.DataFrame, None, PARQUET)
142
+
143
+ async def decode(
144
+ self,
145
+ flyte_value: literals_pb2.StructuredDataset,
146
+ current_task_metadata: literals_pb2.StructuredDatasetMetadata,
147
+ ) -> "pd.DataFrame":
148
+ from botocore.exceptions import NoCredentialsError
149
+
150
+ uri = flyte_value.uri
151
+ columns = None
152
+ kwargs = get_pandas_storage_options(uri=uri, data_config=None)
153
+ if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
154
+ columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
155
+ try:
156
+ return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
157
+ except NoCredentialsError:
158
+ logger.debug("S3 source detected, attempting anonymous S3 access")
159
+ kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
160
+ return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
161
+
162
+
163
+ class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
164
+ def __init__(self):
165
+ super().__init__(pa.Table, None, PARQUET)
166
+
167
+ async def encode(
168
+ self,
169
+ structured_dataset: StructuredDataset,
170
+ structured_dataset_type: types_pb2.StructuredDatasetType,
171
+ ) -> literals_pb2.StructuredDataset:
172
+ import pyarrow.parquet as pq
173
+
174
+ if not structured_dataset.uri:
175
+ from flyte._context import internal_ctx
176
+
177
+ ctx = internal_ctx()
178
+ uri = ctx.raw_data.get_random_remote_path()
179
+ else:
180
+ uri = typing.cast(str, structured_dataset.uri)
181
+
182
+ if not storage.is_remote(uri):
183
+ Path(uri).mkdir(parents=True, exist_ok=True)
184
+ path = os.path.join(uri, f"{0:05}")
185
+ filesystem = storage.get_underlying_filesystem(path=path)
186
+ pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
187
+ return literals_pb2.StructuredDataset(
188
+ uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
189
+ )
190
+
191
+
192
+ class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
193
+ def __init__(self):
194
+ super().__init__(pa.Table, None, PARQUET)
195
+
196
+ async def decode(
197
+ self,
198
+ proto_value: literals_pb2.StructuredDataset,
199
+ current_task_metadata: literals_pb2.StructuredDatasetMetadata,
200
+ ) -> "pa.Table":
201
+ import pyarrow.parquet as pq
202
+ from botocore.exceptions import NoCredentialsError
203
+
204
+ uri = proto_value.uri
205
+ if not storage.is_remote(uri):
206
+ Path(uri).parent.mkdir(parents=True, exist_ok=True)
207
+ _, path = split_protocol(uri)
208
+
209
+ columns = None
210
+ if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
211
+ columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
212
+ try:
213
+ return pq.read_table(path, columns=columns)
214
+ except NoCredentialsError as e:
215
+ logger.debug("S3 source detected, attempting anonymous S3 access")
216
+ fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
217
+ if fs is not None:
218
+ return pq.read_table(path, filesystem=fs, columns=columns)
219
+ raise e