flyte 0.0.1b0__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (455) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -234
  26. flyte/_initialize.py +443 -294
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +322 -33
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +461 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. {union/_cli → flyte/cli}/_params.py +152 -153
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +3 -0
  121. flyte/config/_config.py +248 -0
  122. flyte/config/_internal.py +73 -0
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +230 -129
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -287
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -152
  195. flyte/_cli/main.py +0 -72
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.0.1b0.dist-info/METADATA +0 -179
  261. flyte-0.0.1b0.dist-info/RECORD +0 -390
  262. flyte-0.0.1b0.dist-info/entry_points.txt +0 -3
  263. union/__init__.py +0 -54
  264. union/_api_commons.py +0 -3
  265. union/_bin/__init__.py +0 -0
  266. union/_bin/runtime.py +0 -113
  267. union/_build.py +0 -25
  268. union/_cache/__init__.py +0 -12
  269. union/_cache/cache.py +0 -141
  270. union/_cache/defaults.py +0 -9
  271. union/_cache/policy_function_body.py +0 -42
  272. union/_cli/__init__.py +0 -0
  273. union/_cli/_common.py +0 -263
  274. union/_cli/_create.py +0 -40
  275. union/_cli/_delete.py +0 -23
  276. union/_cli/_deploy.py +0 -120
  277. union/_cli/_get.py +0 -162
  278. union/_cli/_run.py +0 -150
  279. union/_cli/main.py +0 -72
  280. union/_code_bundle/__init__.py +0 -8
  281. union/_code_bundle/_ignore.py +0 -113
  282. union/_code_bundle/_packaging.py +0 -187
  283. union/_code_bundle/_utils.py +0 -342
  284. union/_code_bundle/bundle.py +0 -176
  285. union/_context.py +0 -146
  286. union/_datastructures.py +0 -295
  287. union/_deploy.py +0 -185
  288. union/_doc.py +0 -29
  289. union/_docstring.py +0 -26
  290. union/_environment.py +0 -43
  291. union/_group.py +0 -31
  292. union/_hash.py +0 -23
  293. union/_image.py +0 -760
  294. union/_initialize.py +0 -585
  295. union/_interface.py +0 -84
  296. union/_internal/__init__.py +0 -3
  297. union/_internal/controllers/__init__.py +0 -77
  298. union/_internal/controllers/_local_controller.py +0 -77
  299. union/_internal/controllers/pbhash.py +0 -39
  300. union/_internal/controllers/remote/__init__.py +0 -40
  301. union/_internal/controllers/remote/_action.py +0 -131
  302. union/_internal/controllers/remote/_client.py +0 -43
  303. union/_internal/controllers/remote/_controller.py +0 -169
  304. union/_internal/controllers/remote/_core.py +0 -341
  305. union/_internal/controllers/remote/_informer.py +0 -260
  306. union/_internal/controllers/remote/_service_protocol.py +0 -44
  307. union/_internal/imagebuild/__init__.py +0 -11
  308. union/_internal/imagebuild/docker_builder.py +0 -416
  309. union/_internal/imagebuild/image_builder.py +0 -243
  310. union/_internal/imagebuild/remote_builder.py +0 -0
  311. union/_internal/resolvers/__init__.py +0 -0
  312. union/_internal/resolvers/_task_module.py +0 -31
  313. union/_internal/resolvers/common.py +0 -24
  314. union/_internal/resolvers/default.py +0 -27
  315. union/_internal/runtime/__init__.py +0 -0
  316. union/_internal/runtime/convert.py +0 -163
  317. union/_internal/runtime/entrypoints.py +0 -121
  318. union/_internal/runtime/io.py +0 -136
  319. union/_internal/runtime/resources_serde.py +0 -134
  320. union/_internal/runtime/task_serde.py +0 -202
  321. union/_internal/runtime/taskrunner.py +0 -179
  322. union/_internal/runtime/types_serde.py +0 -53
  323. union/_logging.py +0 -124
  324. union/_protos/__init__.py +0 -0
  325. union/_protos/common/authorization_pb2.py +0 -66
  326. union/_protos/common/authorization_pb2.pyi +0 -106
  327. union/_protos/common/authorization_pb2_grpc.py +0 -4
  328. union/_protos/common/identifier_pb2.py +0 -71
  329. union/_protos/common/identifier_pb2.pyi +0 -82
  330. union/_protos/common/identifier_pb2_grpc.py +0 -4
  331. union/_protos/common/identity_pb2.py +0 -48
  332. union/_protos/common/identity_pb2.pyi +0 -72
  333. union/_protos/common/identity_pb2_grpc.py +0 -4
  334. union/_protos/common/list_pb2.py +0 -36
  335. union/_protos/common/list_pb2.pyi +0 -69
  336. union/_protos/common/list_pb2_grpc.py +0 -4
  337. union/_protos/common/policy_pb2.py +0 -37
  338. union/_protos/common/policy_pb2.pyi +0 -27
  339. union/_protos/common/policy_pb2_grpc.py +0 -4
  340. union/_protos/common/role_pb2.py +0 -37
  341. union/_protos/common/role_pb2.pyi +0 -51
  342. union/_protos/common/role_pb2_grpc.py +0 -4
  343. union/_protos/common/runtime_version_pb2.py +0 -28
  344. union/_protos/common/runtime_version_pb2.pyi +0 -24
  345. union/_protos/common/runtime_version_pb2_grpc.py +0 -4
  346. union/_protos/logs/dataplane/payload_pb2.py +0 -96
  347. union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  348. union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  349. union/_protos/secret/definition_pb2.py +0 -49
  350. union/_protos/secret/definition_pb2.pyi +0 -93
  351. union/_protos/secret/definition_pb2_grpc.py +0 -4
  352. union/_protos/secret/payload_pb2.py +0 -62
  353. union/_protos/secret/payload_pb2.pyi +0 -94
  354. union/_protos/secret/payload_pb2_grpc.py +0 -4
  355. union/_protos/secret/secret_pb2.py +0 -38
  356. union/_protos/secret/secret_pb2.pyi +0 -6
  357. union/_protos/secret/secret_pb2_grpc.py +0 -198
  358. union/_protos/validate/validate/validate_pb2.py +0 -76
  359. union/_protos/workflow/node_execution_service_pb2.py +0 -26
  360. union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  361. union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  362. union/_protos/workflow/queue_service_pb2.py +0 -75
  363. union/_protos/workflow/queue_service_pb2.pyi +0 -103
  364. union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  365. union/_protos/workflow/run_definition_pb2.py +0 -100
  366. union/_protos/workflow/run_definition_pb2.pyi +0 -256
  367. union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  368. union/_protos/workflow/run_logs_service_pb2.py +0 -41
  369. union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  370. union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  371. union/_protos/workflow/run_service_pb2.py +0 -133
  372. union/_protos/workflow/run_service_pb2.pyi +0 -173
  373. union/_protos/workflow/run_service_pb2_grpc.py +0 -412
  374. union/_protos/workflow/state_service_pb2.py +0 -58
  375. union/_protos/workflow/state_service_pb2.pyi +0 -69
  376. union/_protos/workflow/state_service_pb2_grpc.py +0 -138
  377. union/_protos/workflow/task_definition_pb2.py +0 -72
  378. union/_protos/workflow/task_definition_pb2.pyi +0 -65
  379. union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  380. union/_protos/workflow/task_service_pb2.py +0 -44
  381. union/_protos/workflow/task_service_pb2.pyi +0 -31
  382. union/_protos/workflow/task_service_pb2_grpc.py +0 -104
  383. union/_resources.py +0 -226
  384. union/_retry.py +0 -32
  385. union/_reusable_environment.py +0 -25
  386. union/_run.py +0 -374
  387. union/_secret.py +0 -61
  388. union/_task.py +0 -354
  389. union/_task_environment.py +0 -186
  390. union/_timeout.py +0 -47
  391. union/_tools.py +0 -27
  392. union/_utils/__init__.py +0 -11
  393. union/_utils/asyn.py +0 -119
  394. union/_utils/file_handling.py +0 -71
  395. union/_utils/helpers.py +0 -46
  396. union/_utils/lazy_module.py +0 -54
  397. union/_utils/uv_script_parser.py +0 -49
  398. union/_version.py +0 -21
  399. union/connectors/__init__.py +0 -0
  400. union/errors.py +0 -128
  401. union/extras/__init__.py +0 -5
  402. union/extras/_container.py +0 -263
  403. union/io/__init__.py +0 -11
  404. union/io/_dataframe.py +0 -0
  405. union/io/_dir.py +0 -425
  406. union/io/_file.py +0 -418
  407. union/io/pickle/__init__.py +0 -0
  408. union/io/pickle/transformer.py +0 -117
  409. union/io/structured_dataset/__init__.py +0 -122
  410. union/io/structured_dataset/basic_dfs.py +0 -219
  411. union/io/structured_dataset/structured_dataset.py +0 -1057
  412. union/py.typed +0 -0
  413. union/remote/__init__.py +0 -23
  414. union/remote/_client/__init__.py +0 -0
  415. union/remote/_client/_protocols.py +0 -129
  416. union/remote/_client/auth/__init__.py +0 -12
  417. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  418. union/remote/_client/auth/_authenticators/base.py +0 -391
  419. union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
  420. union/remote/_client/auth/_authenticators/device_code.py +0 -120
  421. union/remote/_client/auth/_authenticators/external_command.py +0 -77
  422. union/remote/_client/auth/_authenticators/factory.py +0 -200
  423. union/remote/_client/auth/_authenticators/pkce.py +0 -515
  424. union/remote/_client/auth/_channel.py +0 -184
  425. union/remote/_client/auth/_client_config.py +0 -83
  426. union/remote/_client/auth/_default_html.py +0 -32
  427. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  428. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
  429. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
  430. union/remote/_client/auth/_keyring.py +0 -154
  431. union/remote/_client/auth/_token_client.py +0 -258
  432. union/remote/_client/auth/errors.py +0 -16
  433. union/remote/_client/controlplane.py +0 -86
  434. union/remote/_data.py +0 -149
  435. union/remote/_logs.py +0 -74
  436. union/remote/_project.py +0 -86
  437. union/remote/_run.py +0 -820
  438. union/remote/_secret.py +0 -132
  439. union/remote/_task.py +0 -193
  440. union/report/__init__.py +0 -3
  441. union/report/_report.py +0 -178
  442. union/report/_template.html +0 -124
  443. union/storage/__init__.py +0 -24
  444. union/storage/_remote_fs.py +0 -34
  445. union/storage/_storage.py +0 -247
  446. union/storage/_utils.py +0 -5
  447. union/types/__init__.py +0 -11
  448. union/types/_renderer.py +0 -162
  449. union/types/_string_literals.py +0 -120
  450. union/types/_type_engine.py +0 -2131
  451. union/types/_utils.py +0 -80
  452. /flyte/{_cli → _debug}/__init__.py +0 -0
  453. /flyte/{_protos → _keyring}/__init__.py +0 -0
  454. {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  455. {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import _datetime
4
- import asyncio
5
4
  import collections
6
5
  import types
7
6
  import typing
8
7
  from abc import ABC, abstractmethod
9
- from dataclasses import dataclass, field, is_dataclass
8
+ from dataclasses import is_dataclass
10
9
  from typing import Any, ClassVar, Coroutine, Dict, Generic, List, Optional, Type, Union
11
10
 
12
- import msgpack
13
- from flyteidl.core import literals_pb2, types_pb2
11
+ from flyteidl2.core import literals_pb2, types_pb2
14
12
  from fsspec.utils import get_protocol
15
- from mashumaro.mixins.json import DataClassJSONMixin
16
13
  from mashumaro.types import SerializableType
17
- from pydantic import model_serializer, model_validator
14
+ from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_serializer, model_validator
18
15
  from typing_extensions import Annotated, TypeAlias, get_args, get_origin
19
16
 
20
17
  import flyte.storage as storage
@@ -35,58 +32,66 @@ else:
35
32
  pd = lazy_module("pandas")
36
33
  pa = lazy_module("pyarrow")
37
34
 
38
- T = typing.TypeVar("T") # StructuredDataset type or a dataframe type
35
+ T = typing.TypeVar("T") # DataFrame type or a dataframe type
39
36
  DF = typing.TypeVar("DF") # Dataframe type
40
37
 
41
- # For specifying the storage formats of StructuredDatasets. It's just a string, nothing fancy.
42
- StructuredDatasetFormat: TypeAlias = str
38
+ # For specifying the storage formats of DataFrames. It's just a string, nothing fancy.
39
+ DataFrameFormat: TypeAlias = str
43
40
 
44
41
  # Storage formats
45
- PARQUET: StructuredDatasetFormat = "parquet"
46
- CSV: StructuredDatasetFormat = "csv"
47
- GENERIC_FORMAT: StructuredDatasetFormat = ""
42
+ PARQUET: DataFrameFormat = "parquet"
43
+ CSV: DataFrameFormat = "csv"
44
+ GENERIC_FORMAT: DataFrameFormat = ""
48
45
  GENERIC_PROTOCOL: str = "generic protocol"
49
46
 
50
47
 
51
- @dataclass
52
- class StructuredDataset(SerializableType, DataClassJSONMixin):
48
+ class DataFrame(BaseModel, SerializableType):
53
49
  """
54
- This is the user facing StructuredDataset class. Please don't confuse it with the literals.StructuredDataset
50
+ This is the user facing DataFrame class. Please don't confuse it with the literals.StructuredDataset
55
51
  class (that is just a model, a Python class representation of the protobuf).
56
52
  """
57
53
 
58
- uri: typing.Optional[str] = field(default=None)
59
- file_format: typing.Optional[str] = field(default=GENERIC_FORMAT)
54
+ uri: typing.Optional[str] = Field(default=None)
55
+ format: typing.Optional[str] = Field(default=GENERIC_FORMAT)
56
+
57
+ model_config = ConfigDict(arbitrary_types_allowed=True)
58
+
59
+ # Private attributes that are not part of the Pydantic model schema
60
+ _raw_df: typing.Optional[typing.Any] = PrivateAttr(default=None)
61
+ _metadata: typing.Optional[literals_pb2.StructuredDatasetMetadata] = PrivateAttr(default=None)
62
+ _literal_sd: Optional[literals_pb2.StructuredDataset] = PrivateAttr(default=None)
63
+ _dataframe_type: Optional[Type[Any]] = PrivateAttr(default=None)
64
+ _already_uploaded: bool = PrivateAttr(default=False)
60
65
 
61
66
  # loop manager is working better than synchronicity for some reason, was getting an error but may be an easy fix
62
67
  def _serialize(self) -> Dict[str, Optional[str]]:
63
68
  # dataclass case
64
69
  lt = TypeEngine.to_literal_type(type(self))
65
- engine = StructuredDatasetTransformerEngine()
70
+ engine = DataFrameTransformerEngine()
66
71
  lv = loop_manager.run_sync(engine.to_literal, self, type(self), lt)
67
- sd = StructuredDataset(uri=lv.scalar.structured_dataset.uri)
68
- sd.file_format = lv.scalar.structured_dataset.metadata.structured_dataset_type.format
72
+ sd = DataFrame(uri=lv.scalar.structured_dataset.uri)
73
+ sd.format = lv.scalar.structured_dataset.metadata.structured_dataset_type.format
69
74
  return {
70
75
  "uri": sd.uri,
71
- "file_format": sd.file_format,
76
+ "format": sd.format,
72
77
  }
73
78
 
74
79
  @classmethod
75
- def _deserialize(cls, value) -> "StructuredDataset":
80
+ def _deserialize(cls, value) -> DataFrame:
76
81
  uri = value.get("uri", None)
77
- file_format = value.get("file_format", None)
82
+ format_val = value.get("format", None)
78
83
 
79
84
  if uri is None:
80
- raise ValueError("StructuredDataset's uri and file format should not be None")
85
+ raise ValueError("DataFrame's uri and file format should not be None")
81
86
 
82
- engine = StructuredDatasetTransformerEngine()
87
+ engine = DataFrameTransformerEngine()
83
88
  return loop_manager.run_sync(
84
89
  engine.to_python_value,
85
90
  literals_pb2.Literal(
86
91
  scalar=literals_pb2.Scalar(
87
92
  structured_dataset=literals_pb2.StructuredDataset(
88
93
  metadata=literals_pb2.StructuredDatasetMetadata(
89
- structured_dataset_type=types_pb2.StructuredDatasetType(format=file_format)
94
+ structured_dataset_type=types_pb2.StructuredDatasetType(format=format_val)
90
95
  ),
91
96
  uri=uri,
92
97
  )
@@ -96,28 +101,28 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
96
101
  )
97
102
 
98
103
  @model_serializer
99
- def serialize_structured_dataset(self) -> Dict[str, Optional[str]]:
104
+ def serialize_dataframe(self) -> Dict[str, Optional[str]]:
100
105
  lt = TypeEngine.to_literal_type(type(self))
101
- sde = StructuredDatasetTransformerEngine()
106
+ sde = DataFrameTransformerEngine()
102
107
  lv = loop_manager.run_sync(sde.to_literal, self, type(self), lt)
103
108
  return {
104
109
  "uri": lv.scalar.structured_dataset.uri,
105
- "file_format": lv.scalar.structured_dataset.metadata.structured_dataset_type.format,
110
+ "format": lv.scalar.structured_dataset.metadata.structured_dataset_type.format,
106
111
  }
107
112
 
108
113
  @model_validator(mode="after")
109
- def deserialize_structured_dataset(self, info) -> StructuredDataset:
114
+ def deserialize_dataframe(self, info) -> DataFrame:
110
115
  if info.context is None or info.context.get("deserialize") is not True:
111
116
  return self
112
117
 
113
- engine = StructuredDatasetTransformerEngine()
118
+ engine = DataFrameTransformerEngine()
114
119
  return loop_manager.run_sync(
115
120
  engine.to_python_value,
116
121
  literals_pb2.Literal(
117
122
  scalar=literals_pb2.Scalar(
118
123
  structured_dataset=literals_pb2.StructuredDataset(
119
124
  metadata=literals_pb2.StructuredDatasetMetadata(
120
- structured_dataset_type=types_pb2.StructuredDatasetType(format=self.file_format)
125
+ structured_dataset_type=types_pb2.StructuredDatasetType(format=self.format)
121
126
  ),
122
127
  uri=self.uri,
123
128
  )
@@ -134,30 +139,46 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
134
139
  def column_names(cls) -> typing.List[str]:
135
140
  return [k for k, v in cls.columns().items()]
136
141
 
137
- def __init__(
138
- self,
139
- dataframe: typing.Optional[typing.Any] = None,
142
+ @classmethod
143
+ def from_df(
144
+ cls,
145
+ val: typing.Optional[typing.Any] = None,
140
146
  uri: typing.Optional[str] = None,
141
- metadata: typing.Optional[literals_pb2.StructuredDatasetMetadata] = None,
147
+ ) -> DataFrame:
148
+ """
149
+ Wrapper to create a DataFrame from a dataframe.
150
+ The reason this is implemented as a wrapper instead of a full translation invoking
151
+ the type engine and the encoders is because there's too much information in the type
152
+ signature of the task that we don't want the user to have to replicate.
153
+ """
154
+ instance = cls(uri=uri)
155
+ instance._raw_df = val
156
+ return instance
157
+
158
+ @classmethod
159
+ def from_existing_remote(
160
+ cls,
161
+ remote_path: str,
162
+ format: typing.Optional[str] = None,
142
163
  **kwargs,
143
- ):
144
- self._dataframe = dataframe
145
- # Make these fields public, so that the dataclass transformer can set a value for it
146
- # https://github.com/flyteorg/flytekit/blob/bcc8541bd6227b532f8462563fe8aac902242b21/flytekit/core/type_engine.py#L298
147
- self.uri = uri
148
- # When dataclass_json runs from_json, we need to set it here, otherwise the format will be empty string
149
- self.file_format = kwargs["file_format"] if "file_format" in kwargs else GENERIC_FORMAT
150
- # This is a special attribute that indicates if the data was either downloaded or uploaded
151
- self._metadata = metadata
152
- # This is not for users to set, the transformer will set this.
153
- self._literal_sd: Optional[literals_pb2.StructuredDataset] = None
154
- # Not meant for users to set, will be set by an open() call
155
- self._dataframe_type: Optional[DF] = None # type: ignore
156
- self._already_uploaded = False
164
+ ) -> "DataFrame":
165
+ """
166
+ Create a DataFrame reference from an existing remote dataframe.
167
+
168
+ Args:
169
+ remote_path: The remote path to the existing dataframe
170
+ format: Format of the stored dataframe
171
+
172
+ Example:
173
+ ```python
174
+ df = DataFrame.from_existing_remote("s3://bucket/data.parquet", format="parquet")
175
+ ```
176
+ """
177
+ return cls(uri=remote_path, format=format or GENERIC_FORMAT, **kwargs)
157
178
 
158
179
  @property
159
- def dataframe(self) -> Optional[DF]:
160
- return self._dataframe
180
+ def val(self) -> Optional[DF]:
181
+ return self._raw_df
161
182
 
162
183
  @property
163
184
  def metadata(self) -> Optional[literals_pb2.StructuredDatasetMetadata]:
@@ -168,18 +189,18 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
168
189
  return self._literal_sd
169
190
 
170
191
  def open(self, dataframe_type: Type[DF]):
171
- from flyte.io.structured_dataset import lazy_import_structured_dataset_handler
172
-
173
192
  """
174
193
  Load the handler if needed. For the use case like:
175
194
  @task
176
- def t1(sd: StructuredDataset):
195
+ def t1(df: DataFrame):
177
196
  import pandas as pd
178
- sd.open(pd.DataFrame).all()
197
+ df.open(pd.DataFrame).all()
179
198
 
180
- pandas is imported inside the task, so pandas handler won't be loaded during deserialization in type engine.
199
+ pandas is imported inside the task, so panda handler won't be loaded during deserialization in type engine.
181
200
  """
182
- lazy_import_structured_dataset_handler()
201
+ from flyte.io._dataframe import lazy_import_dataframe_handler
202
+
203
+ lazy_import_dataframe_handler()
183
204
  self._dataframe_type = dataframe_type
184
205
  return self
185
206
 
@@ -187,22 +208,22 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
187
208
  if self._dataframe_type is None:
188
209
  raise ValueError("No dataframe type set. Use open() to set the local dataframe type you want to use.")
189
210
 
190
- if self.uri is not None and self.dataframe is None:
191
- expected = TypeEngine.to_literal_type(StructuredDataset)
211
+ if self.uri is not None and self.val is None:
212
+ expected = TypeEngine.to_literal_type(DataFrame)
192
213
  await self._set_literal(expected)
193
214
 
194
215
  return await flyte_dataset_transformer.open_as(self.literal, self._dataframe_type, self.metadata)
195
216
 
196
217
  async def _set_literal(self, expected: types_pb2.LiteralType) -> None:
197
218
  """
198
- Explicitly set the StructuredDataset Literal to handle the following cases:
219
+ Explicitly set the DataFrame Literal to handle the following cases:
199
220
 
200
- 1. Read a dataframe from a StructuredDataset with an uri, for example:
221
+ 1. Read the content from a DataFrame with an uri, for example:
201
222
 
202
223
  @task
203
- def return_sd() -> StructuredDataset:
204
- sd = StructuredDataset(uri="s3://my-s3-bucket/s3_flyte_dir/df.parquet", file_format="parquet")
205
- df = sd.open(pd.DataFrame).all()
224
+ def return_df() -> DataFrame:
225
+ df = DataFrame(uri="s3://my-s3-bucket/s3_flyte_dir/df.parquet", format="parquet")
226
+ df = df.open(pd.DataFrame).all()
206
227
  return df
207
228
 
208
229
  For details, please refer to this issue: https://github.com/flyteorg/flyte/issues/5954.
@@ -212,14 +233,14 @@ class StructuredDataset(SerializableType, DataClassJSONMixin):
212
233
 
213
234
  For details, please refer to this issue: https://github.com/flyteorg/flyte/issues/5956.
214
235
  """
215
- to_literal = await flyte_dataset_transformer.to_literal(self, StructuredDataset, expected)
236
+ to_literal = await flyte_dataset_transformer.to_literal(self, DataFrame, expected)
216
237
  self._literal_sd = to_literal.scalar.structured_dataset
217
238
  if self.metadata is None:
218
239
  self._metadata = self._literal_sd.metadata
219
240
 
220
241
  async def set_literal(self, expected: types_pb2.LiteralType) -> None:
221
242
  """
222
- A public wrapper method to set the StructuredDataset Literal.
243
+ A public wrapper method to set the DataFrame Literal.
223
244
 
224
245
  This method provides external access to the internal _set_literal method.
225
246
  """
@@ -244,6 +265,9 @@ def flatten_dict(sub_dict: dict, parent_key: str = "") -> typing.Dict:
244
265
  fields = getattr(value, "__dataclass_fields__")
245
266
  d = {k: v.type for k, v in fields.items()}
246
267
  result.update(flatten_dict(sub_dict=d, parent_key=current_key))
268
+ elif hasattr(value, "model_fields"): # Pydantic model
269
+ d = {k: v.annotation for k, v in value.model_fields.items()}
270
+ result.update(flatten_dict(sub_dict=d, parent_key=current_key))
247
271
  else:
248
272
  result[current_key] = value
249
273
  return result
@@ -256,7 +280,7 @@ def extract_cols_and_format(
256
280
  Helper function, just used to iterate through Annotations and extract out the following information:
257
281
  - base type, if not Annotated, it will just be the type that was passed in.
258
282
  - column information, as a collections.OrderedDict,
259
- - the storage format, as a ``StructuredDatasetFormat`` (str),
283
+ - the storage format, as a ``DataFrameFormat`` (str),
260
284
  - pa.lib.Schema
261
285
 
262
286
  If more than one of any type of thing is found, an error will be raised.
@@ -286,7 +310,7 @@ def extract_cols_and_format(
286
310
  d = collections.OrderedDict()
287
311
  d.update(aa)
288
312
  ordered_dict_cols = d
289
- elif isinstance(aa, StructuredDatasetFormat):
313
+ elif isinstance(aa, DataFrameFormat):
290
314
  if fmt != "":
291
315
  raise ValueError(f"A format was already specified {fmt}, cannot use {aa}")
292
316
  fmt = aa
@@ -305,7 +329,7 @@ def extract_cols_and_format(
305
329
  return t, ordered_dict_cols, fmt, pa_schema
306
330
 
307
331
 
308
- class StructuredDatasetEncoder(ABC, Generic[T]):
332
+ class DataFrameEncoder(ABC, Generic[T]):
309
333
  def __init__(
310
334
  self,
311
335
  python_type: Type[T],
@@ -314,10 +338,10 @@ class StructuredDatasetEncoder(ABC, Generic[T]):
314
338
  ):
315
339
  """
316
340
  Extend this abstract class, implement the encode function, and register your concrete class with the
317
- StructuredDatasetTransformerEngine class in order for the core flytekit type engine to handle
341
+ DataFrameTransformerEngine class in order for the core flytekit type engine to handle
318
342
  dataframe libraries. This is the encoding interface, meaning it is used when there is a Python value that the
319
343
  flytekit type engine is trying to convert into a Flyte Literal. For the other way, see
320
- the StructuredDatasetEncoder
344
+ the DataFrameEncoder
321
345
 
322
346
  :param python_type: The dataframe class in question that you want to register this encoder with
323
347
  :param protocol: A prefix representing the storage driver (e.g. 's3, 'gs', 'bq', etc.). You can use either
@@ -347,7 +371,7 @@ class StructuredDatasetEncoder(ABC, Generic[T]):
347
371
  @abstractmethod
348
372
  async def encode(
349
373
  self,
350
- structured_dataset: StructuredDataset,
374
+ dataframe: DataFrame,
351
375
  structured_dataset_type: types_pb2.StructuredDatasetType,
352
376
  ) -> literals_pb2.StructuredDataset:
353
377
  """
@@ -357,20 +381,20 @@ class StructuredDatasetEncoder(ABC, Generic[T]):
357
381
  the
358
382
  # TODO: Do we need to add a flag to indicate if it was wrapped by the transformer or by the user?
359
383
 
360
- :param structured_dataset: This is a StructuredDataset wrapper object. See more info above.
361
- :param structured_dataset_type: This the StructuredDatasetType, as found in the LiteralType of the interface
384
+ :param dataframe: This is a DataFrame wrapper object. See more info above.
385
+ :param structured_dataset_type: This the DataFrameType, as found in the LiteralType of the interface
362
386
  of the task that invoked this encoding call. It is passed along to encoders so that authors of encoders
363
- can include it in the returned literals.StructuredDataset. See the IDL for more information on why this
387
+ can include it in the returned literals.DataFrame. See the IDL for more information on why this
364
388
  literal in particular carries the type information along with it. If the encoder doesn't supply it, it will
365
389
  also be filled in after the encoder runs by the transformer engine.
366
- :return: This function should return a StructuredDataset literal object. Do not confuse this with the
367
- StructuredDataset wrapper class used as input to this function - that is the user facing Python class.
368
- This function needs to return the IDL StructuredDataset.
390
+ :return: This function should return a DataFrame literal object. Do not confuse this with the
391
+ DataFrame wrapper class used as input to this function - that is the user facing Python class.
392
+ This function needs to return the IDL DataFrame.
369
393
  """
370
394
  raise NotImplementedError
371
395
 
372
396
 
373
- class StructuredDatasetDecoder(ABC, Generic[DF]):
397
+ class DataFrameDecoder(ABC, Generic[DF]):
374
398
  def __init__(
375
399
  self,
376
400
  python_type: Type[DF],
@@ -380,9 +404,9 @@ class StructuredDatasetDecoder(ABC, Generic[DF]):
380
404
  ):
381
405
  """
382
406
  Extend this abstract class, implement the decode function, and register your concrete class with the
383
- StructuredDatasetTransformerEngine class in order for the core flytekit type engine to handle
407
+ DataFrameTransformerEngine class in order for the core flytekit type engine to handle
384
408
  dataframe libraries. This is the decoder interface, meaning it is used when there is a Flyte Literal value,
385
- and we have to get a Python value out of it. For the other way, see the StructuredDatasetEncoder
409
+ and we have to get a Python value out of it. For the other way, see the DataFrameEncoder
386
410
 
387
411
  :param python_type: The dataframe class in question that you want to register this decoder with
388
412
  :param protocol: A prefix representing the storage driver (e.g. 's3, 'gs', 'bq', etc.). You can use either
@@ -419,8 +443,8 @@ class StructuredDatasetDecoder(ABC, Generic[DF]):
419
443
  This is code that will be called by the dataset transformer engine to ultimately translate from a Flyte Literal
420
444
  value into a Python instance.
421
445
 
422
- :param flyte_value: This will be a Flyte IDL StructuredDataset Literal - do not confuse this with the
423
- StructuredDataset class defined also in this module.
446
+ :param flyte_value: This will be a Flyte IDL DataFrame Literal - do not confuse this with the
447
+ DataFrame class defined also in this module.
424
448
  :param current_task_metadata: Metadata object containing the type (and columns if any) for the currently
425
449
  executing task. This type may have more or less information than the type information bundled
426
450
  inside the incoming flyte_value.
@@ -459,19 +483,19 @@ def get_supported_types():
459
483
  class DuplicateHandlerError(ValueError): ...
460
484
 
461
485
 
462
- class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
486
+ class DataFrameTransformerEngine(TypeTransformer[DataFrame]):
463
487
  """
464
488
  Think of this transformer as a higher-level meta transformer that is used for all the dataframe types.
465
489
  If you are bringing a custom data frame type, or any data frame type, to flytekit, instead of
466
490
  registering with the main type engine, you should register with this transformer instead.
467
491
  """
468
492
 
469
- ENCODERS: ClassVar[Dict[Type, Dict[str, Dict[str, StructuredDatasetEncoder]]]] = {}
470
- DECODERS: ClassVar[Dict[Type, Dict[str, Dict[str, StructuredDatasetDecoder]]]] = {}
493
+ ENCODERS: ClassVar[Dict[Type, Dict[str, Dict[str, DataFrameEncoder]]]] = {}
494
+ DECODERS: ClassVar[Dict[Type, Dict[str, Dict[str, DataFrameDecoder]]]] = {}
471
495
  DEFAULT_PROTOCOLS: ClassVar[Dict[Type, str]] = {}
472
496
  DEFAULT_FORMATS: ClassVar[Dict[Type, str]] = {}
473
497
 
474
- Handlers = Union[StructuredDatasetEncoder, StructuredDatasetDecoder]
498
+ Handlers = Union[DataFrameEncoder, DataFrameDecoder]
475
499
  Renderers: ClassVar[Dict[Type, Renderable]] = {}
476
500
 
477
501
  @classmethod
@@ -527,17 +551,17 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
527
551
 
528
552
  @classmethod
529
553
  def get_encoder(cls, df_type: Type, protocol: str, format: str):
530
- return cls._finder(StructuredDatasetTransformerEngine.ENCODERS, df_type, protocol, format)
554
+ return cls._finder(DataFrameTransformerEngine.ENCODERS, df_type, protocol, format)
531
555
 
532
556
  @classmethod
533
- def get_decoder(cls, df_type: Type, protocol: str, format: str) -> StructuredDatasetDecoder:
534
- return cls._finder(StructuredDatasetTransformerEngine.DECODERS, df_type, protocol, format)
557
+ def get_decoder(cls, df_type: Type, protocol: str, format: str) -> DataFrameDecoder:
558
+ return cls._finder(DataFrameTransformerEngine.DECODERS, df_type, protocol, format)
535
559
 
536
560
  @classmethod
537
561
  def _handler_finder(cls, h: Handlers, protocol: str) -> Dict[str, Handlers]:
538
- if isinstance(h, StructuredDatasetEncoder):
562
+ if isinstance(h, DataFrameEncoder):
539
563
  top_level = cls.ENCODERS
540
- elif isinstance(h, StructuredDatasetDecoder):
564
+ elif isinstance(h, DataFrameDecoder):
541
565
  top_level = cls.DECODERS # type: ignore
542
566
  else:
543
567
  raise TypeError(f"We don't support this type of handler {h}")
@@ -548,7 +572,7 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
548
572
  return top_level[h.python_type][protocol] # type: ignore
549
573
 
550
574
  def __init__(self):
551
- super().__init__("StructuredDataset Transformer", StructuredDataset)
575
+ super().__init__("DataFrame Transformer", DataFrame)
552
576
  self._type_assertions_enabled = False
553
577
 
554
578
  @classmethod
@@ -568,7 +592,7 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
568
592
  Call this with any Encoder or Decoder to register it with the flytekit type system. If your handler does not
569
593
  specify a protocol (e.g. s3, gs, etc.) field, then
570
594
 
571
- :param h: The StructuredDatasetEncoder or StructuredDatasetDecoder you wish to register with this transformer.
595
+ :param h: The DataFrameEncoder or DataFrameDecoder you wish to register with this transformer.
572
596
  :param default_for_type: If set, when a user returns from a task an instance of the dataframe the handler
573
597
  handles, e.g. ``return pd.DataFrame(...)``, not wrapped around the ``StructuredDataset`` object, we will
574
598
  use this handler's protocol and format as the default, effectively saying that this handler will be called.
@@ -582,7 +606,7 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
582
606
  :param default_storage_for_type: Same as above but only for the storage format. Error if already set,
583
607
  unless override is specified.
584
608
  """
585
- if not (isinstance(h, StructuredDatasetEncoder) or isinstance(h, StructuredDatasetDecoder)):
609
+ if not (isinstance(h, DataFrameEncoder) or isinstance(h, DataFrameDecoder)):
586
610
  raise TypeError(f"We don't support this type of handler {h}")
587
611
 
588
612
  if h.protocol is None:
@@ -623,17 +647,21 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
623
647
  f"Already registered a handler for {(h.python_type, protocol, h.supported_format)}"
624
648
  )
625
649
  lowest_level[h.supported_format] = h
626
- logger.debug(f"Registered {h} as handler for {h.python_type}, protocol {protocol}, fmt {h.supported_format}")
650
+ logger.debug(
651
+ f"Registered {h.__class__.__name__} as handler for {h.python_type.__class__.__name__},"
652
+ f" protocol {protocol}, fmt {h.supported_format}"
653
+ )
627
654
 
628
655
  if (default_format_for_type or default_for_type) and h.supported_format != GENERIC_FORMAT:
629
656
  if h.python_type in cls.DEFAULT_FORMATS and not override:
630
657
  if cls.DEFAULT_FORMATS[h.python_type] != h.supported_format:
631
658
  logger.info(
632
- f"Not using handler {h} with format {h.supported_format}"
633
- f" as default for {h.python_type}, {cls.DEFAULT_FORMATS[h.python_type]} already specified."
659
+ f"Not using handler {h.__class__.__name__} with format {h.supported_format}"
660
+ f" as default for {h.python_type.__class__.__name__},"
661
+ f" {cls.DEFAULT_FORMATS[h.python_type]} already specified."
634
662
  )
635
663
  else:
636
- logger.debug(f"Use {type(h).__name__} as default handler for {h.python_type}.")
664
+ logger.debug(f"Use {type(h).__name__} as default handler for {h.python_type.__class__.__name__}.")
637
665
  cls.DEFAULT_FORMATS[h.python_type] = h.supported_format
638
666
  if default_storage_for_type or default_for_type:
639
667
  if h.protocol in cls.DEFAULT_PROTOCOLS and not override:
@@ -648,27 +676,27 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
648
676
  # Register with the type engine as well
649
677
  # The semantics as of now are such that it doesn't matter which order these transformers are loaded in, as
650
678
  # long as the older Pandas/FlyteSchema transformer do not also specify the override
651
- engine = StructuredDatasetTransformerEngine()
679
+ engine = DataFrameTransformerEngine()
652
680
  TypeEngine.register_additional_type(engine, h.python_type, override=True)
653
681
 
654
- def assert_type(self, t: Type[StructuredDataset], v: typing.Any):
682
+ def assert_type(self, t: Type[DataFrame], v: typing.Any):
655
683
  return
656
684
 
657
685
  async def to_literal(
658
686
  self,
659
- python_val: Union[StructuredDataset, typing.Any],
660
- python_type: Union[Type[StructuredDataset], Type],
687
+ python_val: Union[DataFrame, typing.Any],
688
+ python_type: Union[Type[DataFrame], Type],
661
689
  expected: types_pb2.LiteralType,
662
690
  ) -> literals_pb2.Literal:
663
691
  # Make a copy in case we need to hand off to encoders, since we can't be sure of mutations.
664
- python_type, *attrs = extract_cols_and_format(python_type)
692
+ python_type, *_attrs = extract_cols_and_format(python_type)
665
693
  sdt = types_pb2.StructuredDatasetType(format=self.DEFAULT_FORMATS.get(python_type, GENERIC_FORMAT))
666
694
 
667
- if issubclass(python_type, StructuredDataset) and not isinstance(python_val, StructuredDataset):
695
+ if issubclass(python_type, DataFrame) and not isinstance(python_val, DataFrame):
668
696
  # Catch a common mistake
669
697
  raise TypeTransformerFailedError(
670
- f"Expected a StructuredDataset instance, but got {type(python_val)} instead."
671
- f" Did you forget to wrap your dataframe in a StructuredDataset instance?"
698
+ f"Expected a DataFrame instance, but got {type(python_val)} instead."
699
+ f" Did you forget to wrap your dataframe in a DataFrame instance?"
672
700
  )
673
701
 
674
702
  if expected and expected.structured_dataset_type:
@@ -679,60 +707,60 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
679
707
  external_schema_bytes=expected.structured_dataset_type.external_schema_bytes,
680
708
  )
681
709
 
682
- # If the type signature has the StructuredDataset class, it will, or at least should, also be a
683
- # StructuredDataset instance.
684
- if isinstance(python_val, StructuredDataset):
710
+ # If the type signature has the DataFrame class, it will, or at least should, also be a
711
+ # DataFrame instance.
712
+ if isinstance(python_val, DataFrame):
685
713
  # There are three cases that we need to take care of here.
686
714
 
687
- # 1. A task returns a StructuredDataset that was just a passthrough input. If this happens
688
- # then return the original literals.StructuredDataset without invoking any encoder
715
+ # 1. A task returns a DataFrame that was just a passthrough input. If this happens
716
+ # then return the original literals.DataFrame without invoking any encoder
689
717
  #
690
718
  # Ex.
691
- # def t1(dataset: Annotated[StructuredDataset, my_cols]) -> Annotated[StructuredDataset, my_cols]:
719
+ # def t1(dataset: Annotated[DataFrame, my_cols]) -> Annotated[DataFrame, my_cols]:
692
720
  # return dataset
693
721
  if python_val._literal_sd is not None:
694
722
  if python_val._already_uploaded:
695
723
  return literals_pb2.Literal(scalar=literals_pb2.Scalar(structured_dataset=python_val._literal_sd))
696
- if python_val.dataframe is not None:
724
+ if python_val.val is not None:
697
725
  raise ValueError(
698
- f"Shouldn't have specified both literal {python_val._literal_sd}"
699
- f" and dataframe {python_val.dataframe}"
726
+ f"Shouldn't have specified both literal {python_val._literal_sd} and dataframe {python_val.val}"
700
727
  )
701
728
  return literals_pb2.Literal(scalar=literals_pb2.Scalar(structured_dataset=python_val._literal_sd))
702
729
 
703
- # 2. A task returns a python StructuredDataset with an uri.
704
- # Note: this case is also what happens we start a local execution of a task with a python StructuredDataset.
705
- # It gets converted into a literal first, then back into a python StructuredDataset.
730
+ # 2. A task returns a python DataFrame with an uri.
731
+ # Note: this case is also what happens we start a local execution of a task with a python DataFrame.
732
+ # It gets converted into a literal first, then back into a python DataFrame.
706
733
  #
707
734
  # Ex.
708
- # def t2(uri: str) -> Annotated[StructuredDataset, my_cols]
709
- # return StructuredDataset(uri=uri)
710
- if python_val.dataframe is None:
735
+ # def t2(uri: str) -> Annotated[DataFrame, my_cols]
736
+ # return DataFrame(uri=uri)
737
+ if python_val.val is None:
711
738
  uri = python_val.uri
712
- file_format = python_val.file_format
739
+ format_val = python_val.format
713
740
 
714
741
  # Check the user-specified uri
715
742
  if not uri:
716
743
  raise ValueError(f"If dataframe is not specified, then the uri should be specified. {python_val}")
717
744
  if not storage.is_remote(uri):
718
- uri = await storage.put(uri)
745
+ uri = await storage.put(uri, recursive=True)
719
746
 
720
- # Check the user-specified file_format
721
- # When users specify file_format for a StructuredDataset, the file_format should be retained
747
+ # Check the user-specified format
748
+ # When users specify format for a DataFrame, the format should be retained
722
749
  # conditionally. For details, please refer to https://github.com/flyteorg/flyte/issues/6096.
723
750
  # Following illustrates why we can't always copy the user-specified file_format over:
724
751
  #
725
752
  # @task
726
- # def modify_format(sd: Annotated[StructuredDataset, {}, "task-format"]) -> StructuredDataset:
727
- # return sd
753
+ # def modify_format(df: Annotated[DataFrame, {}, "task-format"]) -> DataFrame:
754
+ # return df
728
755
  #
729
- # sd = StructuredDataset(uri="s3://my-s3-bucket/df.parquet", file_format="user-format")
730
- # sd2 = modify_format(sd=sd)
756
+ # df = DataFrame(uri="s3://my-s3-bucket/df.parquet", format="user-format")
757
+ # df2 = modify_format(df=df)
731
758
  #
732
- # In this case, we expect sd2.file_format to be task-format (as shown in Annotated), not user-format.
733
- # If we directly copy the user-specified file_format over, the type hint information will be missing.
734
- if sdt.format == GENERIC_FORMAT and file_format != GENERIC_FORMAT:
735
- sdt.format = file_format
759
+ # In this case, we expect the df2.format to be task-format (as shown in Annotated),
760
+ # not user-format. If we directly copy the user-specified format over,
761
+ # the type hint information will be missing.
762
+ if sdt.format == GENERIC_FORMAT and format_val != GENERIC_FORMAT:
763
+ sdt.format = format_val
736
764
 
737
765
  sd_model = literals_pb2.StructuredDataset(
738
766
  uri=uri,
@@ -740,9 +768,9 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
740
768
  )
741
769
  return literals_pb2.Literal(scalar=literals_pb2.Scalar(structured_dataset=sd_model))
742
770
 
743
- # 3. This is the third and probably most common case. The python StructuredDataset object wraps a dataframe
771
+ # 3. This is the third and probably most common case. The python DataFrame object wraps a dataframe
744
772
  # that we will need to invoke an encoder for. Figure out which encoder to call and invoke it.
745
- df_type = type(python_val.dataframe)
773
+ df_type = type(python_val.val)
746
774
  protocol = self._protocol_from_type_or_prefix(df_type, python_val.uri)
747
775
 
748
776
  return await self.encode(
@@ -760,8 +788,9 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
760
788
  structured_dataset_type=expected.structured_dataset_type if expected else None
761
789
  )
762
790
 
763
- sd = StructuredDataset(dataframe=python_val, metadata=meta)
764
- return await self.encode(sd, python_type, protocol, fmt, sdt)
791
+ fdf = DataFrame.from_df(val=python_val)
792
+ fdf._metadata = meta
793
+ return await self.encode(fdf, python_type, protocol, fmt, sdt)
765
794
 
766
795
  def _protocol_from_type_or_prefix(self, df_type: Type, uri: Optional[str] = None) -> str:
767
796
  """
@@ -782,16 +811,16 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
782
811
 
783
812
  async def encode(
784
813
  self,
785
- sd: StructuredDataset,
814
+ df: DataFrame,
786
815
  df_type: Type,
787
816
  protocol: str,
788
817
  format: str,
789
818
  structured_literal_type: types_pb2.StructuredDatasetType,
790
819
  ) -> literals_pb2.Literal:
791
- handler: StructuredDatasetEncoder
820
+ handler: DataFrameEncoder
792
821
  handler = self.get_encoder(df_type, protocol, format)
793
822
 
794
- sd_model = await handler.encode(sd, structured_literal_type)
823
+ sd_model = await handler.encode(df, structured_literal_type)
795
824
  # This block is here in case the encoder did not set the type information in the metadata. Since this literal
796
825
  # is special in that it carries around the type itself, we want to make sure the type info therein is at
797
826
  # least as good as the type of the interface.
@@ -807,75 +836,16 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
807
836
  lit = literals_pb2.Literal(scalar=literals_pb2.Scalar(structured_dataset=sd_model))
808
837
 
809
838
  # Because the handler.encode may have uploaded something, and because the sd may end up living inside a
810
- # dataclass, we need to modify any uploaded flyte:// urls here.
811
- modify_literal_uris(lit) # todo: verify that this can be removed.
812
- sd._literal_sd = sd_model
813
- sd._already_uploaded = True
839
+ # dataclass, we need to modify any uploaded flyte:// urls here. Needed here even though the Type engine
840
+ # already does this because the DataframeTransformerEngine may be called directly.
841
+ modify_literal_uris(lit)
842
+ df._literal_sd = sd_model
843
+ df._already_uploaded = True
814
844
  return lit
815
845
 
816
- # pr: han-ru: can this be removed if we make StructuredDataset a pydantic model?
817
- def dict_to_structured_dataset(
818
- self, dict_obj: typing.Dict[str, str], expected_python_type: Type[T] | StructuredDataset
819
- ) -> T | StructuredDataset:
820
- uri = dict_obj.get("uri", None)
821
- file_format = dict_obj.get("file_format", None)
822
-
823
- if uri is None:
824
- raise ValueError("StructuredDataset's uri and file format should not be None")
825
-
826
- # Instead of using python native StructuredDataset, we need to build a literals.StructuredDataset
827
- # The reason is that _literal_sd of python sd is accessed when task output LiteralMap is
828
- # converted back to flyteidl. Hence, _literal_sd must have to_flyte_idl method
829
- # See https://github.com/flyteorg/flytekit/blob/f938661ff8413219d1bea77f6914a58c302d5c6c/flytekit/bin/entrypoint.py#L326
830
- # For details, please refer to this issue: https://github.com/flyteorg/flyte/issues/5956.
831
- sdt = types_pb2.StructuredDatasetType(format=file_format)
832
- metad = literals_pb2.StructuredDatasetMetadata(structured_dataset_type=sdt)
833
- sd_literal = literals_pb2.StructuredDataset(uri=uri, metadata=metad)
834
-
835
- return asyncio.run(
836
- StructuredDatasetTransformerEngine().to_python_value(
837
- literals_pb2.Literal(scalar=literals_pb2.Scalar(structured_dataset=sd_literal)),
838
- expected_python_type,
839
- )
840
- )
841
-
842
- def from_binary_idl(
843
- self, binary_idl_object: literals_pb2.Binary, expected_python_type: Type[T] | StructuredDataset
844
- ) -> T | StructuredDataset:
845
- """
846
- If the input is from flytekit, the Life Cycle will be as follows:
847
-
848
- Life Cycle:
849
- binary IDL -> resolved binary -> bytes -> expected Python object
850
- (flytekit customized (propeller processing) (flytekit binary IDL) (flytekit customized
851
- serialization) deserialization)
852
-
853
- Example Code:
854
- @dataclass
855
- class DC:
856
- sd: StructuredDataset
857
-
858
- @workflow
859
- def wf(dc: DC):
860
- t_sd(dc.sd)
861
-
862
- Note:
863
- - The deserialization is the same as put a structured dataset in a dataclass,
864
- which will deserialize by the mashumaro's API.
865
-
866
- Related PR:
867
- - Title: Override Dataclass Serialization/Deserialization Behavior for FlyteTypes via Mashumaro
868
- - Link: https://github.com/flyteorg/flytekit/pull/2554
869
- """
870
- if binary_idl_object.tag == MESSAGEPACK:
871
- python_val = msgpack.loads(binary_idl_object.value)
872
- return self.dict_to_structured_dataset(dict_obj=python_val, expected_python_type=expected_python_type)
873
- else:
874
- raise TypeTransformerFailedError(f"Unsupported binary format: `{binary_idl_object.tag}`")
875
-
876
846
  async def to_python_value(
877
- self, lv: literals_pb2.Literal, expected_python_type: Type[T] | StructuredDataset
878
- ) -> T | StructuredDataset:
847
+ self, lv: literals_pb2.Literal, expected_python_type: Type[T] | DataFrame
848
+ ) -> T | DataFrame:
879
849
  """
880
850
  The only tricky thing with converting a Literal (say the output of an earlier task), to a Python value at
881
851
  the start of a task execution, is the column subsetting behavior. For example, if you have,
@@ -906,14 +876,13 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
906
876
  | | the running task's signature. | |
907
877
  +-----------------------------+-----------------------------------------+--------------------------------------+
908
878
  """
909
- # Handle dataclass attribute access
910
879
  if lv.HasField("scalar") and lv.scalar.HasField("binary"):
911
- return self.from_binary_idl(lv.scalar.binary, expected_python_type)
880
+ raise TypeTransformerFailedError("Attribute access unsupported.")
912
881
 
913
882
  # Detect annotations and extract out all the relevant information that the user might supply
914
- expected_python_type, column_dict, storage_fmt, pa_schema = extract_cols_and_format(expected_python_type)
883
+ expected_python_type, column_dict, _storage_fmt, _pa_schema = extract_cols_and_format(expected_python_type)
915
884
 
916
- # Start handling for StructuredDataset scalars, first look at the columns
885
+ # Start handling for DataFrame scalars, first look at the columns
917
886
  incoming_columns = lv.scalar.structured_dataset.metadata.structured_dataset_type.columns
918
887
 
919
888
  # If the incoming literal, also doesn't have columns, then we just have an empty list, so initialize here
@@ -935,30 +904,27 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
935
904
  )
936
905
  metad = literals_pb2.StructuredDatasetMetadata(structured_dataset_type=new_sdt)
937
906
 
938
- # A StructuredDataset type, for example
939
- # t1(input_a: StructuredDataset) # or
940
- # t1(input_a: Annotated[StructuredDataset, my_cols])
941
- if issubclass(expected_python_type, StructuredDataset):
942
- sd = expected_python_type(
943
- dataframe=None,
944
- # Note here that the type being passed in
945
- metadata=metad,
946
- )
947
- sd._literal_sd = lv.scalar.structured_dataset
948
- sd.file_format = metad.structured_dataset_type.format
949
- return sd
950
-
951
- # If the requested type was not a StructuredDataset, then it means it was a plain dataframe type, which means
907
+ # A DataFrame type, for example
908
+ # t1(input_a: DataFrame) # or
909
+ # t1(input_a: Annotated[DataFrame, my_cols])
910
+ if issubclass(expected_python_type, DataFrame):
911
+ fdf = DataFrame(format=metad.structured_dataset_type.format, uri=lv.scalar.structured_dataset.uri)
912
+ fdf._already_uploaded = True
913
+ fdf._literal_sd = lv.scalar.structured_dataset
914
+ fdf._metadata = metad
915
+ return fdf
916
+
917
+ # If the requested type was not a flyte.DataFrame, then it means it was a raw dataframe type, which means
952
918
  # we should do the opening/downloading and whatever else it might entail right now. No iteration option here.
953
919
  return await self.open_as(lv.scalar.structured_dataset, df_type=expected_python_type, updated_metadata=metad)
954
920
 
955
921
  def to_html(self, python_val: typing.Any, expected_python_type: Type[T]) -> str:
956
- if isinstance(python_val, StructuredDataset):
957
- if python_val.dataframe is not None:
958
- df = python_val.dataframe
922
+ if isinstance(python_val, DataFrame):
923
+ if python_val.val is not None:
924
+ df = python_val.val
959
925
  else:
960
926
  # Here we only render column information by default instead of opening the structured dataset.
961
- col = typing.cast(StructuredDataset, python_val).columns()
927
+ col = typing.cast(DataFrame, python_val).columns()
962
928
  dataframe = pd.DataFrame(col, ["column type"])
963
929
  return dataframe.to_html() # type: ignore
964
930
  else:
@@ -1004,11 +970,12 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
1004
970
  def _get_dataset_column_literal_type(self, t: Type) -> types_pb2.LiteralType:
1005
971
  if t in get_supported_types():
1006
972
  return get_supported_types()[t]
1007
- if hasattr(t, "__origin__") and t.__origin__ is list:
973
+ origin = getattr(t, "__origin__", None)
974
+ if origin is list:
1008
975
  return types_pb2.LiteralType(collection_type=self._get_dataset_column_literal_type(t.__args__[0]))
1009
- if hasattr(t, "__origin__") and t.__origin__ is dict:
976
+ if origin is dict:
1010
977
  return types_pb2.LiteralType(map_value_type=self._get_dataset_column_literal_type(t.__args__[1]))
1011
- raise AssertionError(f"type {t} is currently not supported by StructuredDataset")
978
+ raise AssertionError(f"type {t} is currently not supported by DataFrame")
1012
979
 
1013
980
  def _convert_ordered_dict_of_columns_to_list(
1014
981
  self, column_map: typing.Optional[typing.OrderedDict[str, Type]]
@@ -1022,10 +989,8 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
1022
989
  converted_cols.append(types_pb2.StructuredDatasetType.DatasetColumn(name=k, literal_type=lt))
1023
990
  return converted_cols
1024
991
 
1025
- def _get_dataset_type(
1026
- self, t: typing.Union[Type[StructuredDataset], typing.Any]
1027
- ) -> types_pb2.StructuredDatasetType:
1028
- original_python_type, column_map, storage_format, pa_schema = extract_cols_and_format(t) # type: ignore
992
+ def _get_dataset_type(self, t: typing.Union[Type[DataFrame], typing.Any]) -> types_pb2.StructuredDatasetType:
993
+ _original_python_type, column_map, storage_format, pa_schema = extract_cols_and_format(t) # type: ignore
1029
994
 
1030
995
  # Get the column information
1031
996
  converted_cols: typing.List[types_pb2.StructuredDatasetType.DatasetColumn] = (
@@ -1039,7 +1004,7 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
1039
1004
  external_schema_bytes=typing.cast(pa.lib.Schema, pa_schema).to_string().encode() if pa_schema else None,
1040
1005
  )
1041
1006
 
1042
- def get_literal_type(self, t: typing.Union[Type[StructuredDataset], typing.Any]) -> types_pb2.LiteralType:
1007
+ def get_literal_type(self, t: typing.Union[Type[DataFrame], typing.Any]) -> types_pb2.LiteralType:
1043
1008
  """
1044
1009
  Provide a concrete implementation so that writers of custom dataframe handlers since there's nothing that
1045
1010
  special about the literal type. Any dataframe type will always be associated with the structured dataset type.
@@ -1049,13 +1014,13 @@ class StructuredDatasetTransformerEngine(TypeTransformer[StructuredDataset]):
1049
1014
  """
1050
1015
  return types_pb2.LiteralType(structured_dataset_type=self._get_dataset_type(t))
1051
1016
 
1052
- def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[StructuredDataset]:
1017
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[DataFrame]:
1053
1018
  # todo: technically we should return the dataframe type specified in the constructor, but to do that,
1054
1019
  # we'd have to store that, which we don't do today. See possibly #1363
1055
1020
  if literal_type.HasField("structured_dataset_type"):
1056
- return StructuredDataset
1057
- raise ValueError(f"StructuredDatasetTransformerEngine cannot reverse {literal_type}")
1021
+ return DataFrame
1022
+ raise ValueError(f"DataFrameTransformerEngine cannot reverse {literal_type}")
1058
1023
 
1059
1024
 
1060
- flyte_dataset_transformer = StructuredDatasetTransformerEngine()
1025
+ flyte_dataset_transformer = DataFrameTransformerEngine()
1061
1026
  TypeEngine.register(flyte_dataset_transformer)