flyte 0.0.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (390) hide show
  1. flyte/__init__.py +62 -0
  2. flyte/_api_commons.py +3 -0
  3. flyte/_bin/__init__.py +0 -0
  4. flyte/_bin/runtime.py +126 -0
  5. flyte/_build.py +25 -0
  6. flyte/_cache/__init__.py +12 -0
  7. flyte/_cache/cache.py +146 -0
  8. flyte/_cache/defaults.py +9 -0
  9. flyte/_cache/policy_function_body.py +42 -0
  10. flyte/_cli/__init__.py +0 -0
  11. flyte/_cli/_common.py +287 -0
  12. flyte/_cli/_create.py +42 -0
  13. flyte/_cli/_delete.py +23 -0
  14. flyte/_cli/_deploy.py +140 -0
  15. flyte/_cli/_get.py +235 -0
  16. flyte/_cli/_run.py +152 -0
  17. flyte/_cli/main.py +72 -0
  18. flyte/_code_bundle/__init__.py +8 -0
  19. flyte/_code_bundle/_ignore.py +113 -0
  20. flyte/_code_bundle/_packaging.py +187 -0
  21. flyte/_code_bundle/_utils.py +339 -0
  22. flyte/_code_bundle/bundle.py +178 -0
  23. flyte/_context.py +146 -0
  24. flyte/_datastructures.py +342 -0
  25. flyte/_deploy.py +202 -0
  26. flyte/_doc.py +29 -0
  27. flyte/_docstring.py +32 -0
  28. flyte/_environment.py +43 -0
  29. flyte/_group.py +31 -0
  30. flyte/_hash.py +23 -0
  31. flyte/_image.py +760 -0
  32. flyte/_initialize.py +634 -0
  33. flyte/_interface.py +84 -0
  34. flyte/_internal/__init__.py +3 -0
  35. flyte/_internal/controllers/__init__.py +115 -0
  36. flyte/_internal/controllers/_local_controller.py +118 -0
  37. flyte/_internal/controllers/_trace.py +40 -0
  38. flyte/_internal/controllers/pbhash.py +39 -0
  39. flyte/_internal/controllers/remote/__init__.py +40 -0
  40. flyte/_internal/controllers/remote/_action.py +141 -0
  41. flyte/_internal/controllers/remote/_client.py +43 -0
  42. flyte/_internal/controllers/remote/_controller.py +361 -0
  43. flyte/_internal/controllers/remote/_core.py +402 -0
  44. flyte/_internal/controllers/remote/_informer.py +361 -0
  45. flyte/_internal/controllers/remote/_service_protocol.py +50 -0
  46. flyte/_internal/imagebuild/__init__.py +11 -0
  47. flyte/_internal/imagebuild/docker_builder.py +416 -0
  48. flyte/_internal/imagebuild/image_builder.py +241 -0
  49. flyte/_internal/imagebuild/remote_builder.py +0 -0
  50. flyte/_internal/resolvers/__init__.py +0 -0
  51. flyte/_internal/resolvers/_task_module.py +54 -0
  52. flyte/_internal/resolvers/common.py +31 -0
  53. flyte/_internal/resolvers/default.py +28 -0
  54. flyte/_internal/runtime/__init__.py +0 -0
  55. flyte/_internal/runtime/convert.py +199 -0
  56. flyte/_internal/runtime/entrypoints.py +135 -0
  57. flyte/_internal/runtime/io.py +136 -0
  58. flyte/_internal/runtime/resources_serde.py +138 -0
  59. flyte/_internal/runtime/task_serde.py +210 -0
  60. flyte/_internal/runtime/taskrunner.py +190 -0
  61. flyte/_internal/runtime/types_serde.py +54 -0
  62. flyte/_logging.py +124 -0
  63. flyte/_protos/__init__.py +0 -0
  64. flyte/_protos/common/authorization_pb2.py +66 -0
  65. flyte/_protos/common/authorization_pb2.pyi +108 -0
  66. flyte/_protos/common/authorization_pb2_grpc.py +4 -0
  67. flyte/_protos/common/identifier_pb2.py +71 -0
  68. flyte/_protos/common/identifier_pb2.pyi +82 -0
  69. flyte/_protos/common/identifier_pb2_grpc.py +4 -0
  70. flyte/_protos/common/identity_pb2.py +48 -0
  71. flyte/_protos/common/identity_pb2.pyi +72 -0
  72. flyte/_protos/common/identity_pb2_grpc.py +4 -0
  73. flyte/_protos/common/list_pb2.py +36 -0
  74. flyte/_protos/common/list_pb2.pyi +69 -0
  75. flyte/_protos/common/list_pb2_grpc.py +4 -0
  76. flyte/_protos/common/policy_pb2.py +37 -0
  77. flyte/_protos/common/policy_pb2.pyi +27 -0
  78. flyte/_protos/common/policy_pb2_grpc.py +4 -0
  79. flyte/_protos/common/role_pb2.py +37 -0
  80. flyte/_protos/common/role_pb2.pyi +53 -0
  81. flyte/_protos/common/role_pb2_grpc.py +4 -0
  82. flyte/_protos/common/runtime_version_pb2.py +28 -0
  83. flyte/_protos/common/runtime_version_pb2.pyi +24 -0
  84. flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
  85. flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
  86. flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  87. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  88. flyte/_protos/secret/definition_pb2.py +49 -0
  89. flyte/_protos/secret/definition_pb2.pyi +93 -0
  90. flyte/_protos/secret/definition_pb2_grpc.py +4 -0
  91. flyte/_protos/secret/payload_pb2.py +62 -0
  92. flyte/_protos/secret/payload_pb2.pyi +94 -0
  93. flyte/_protos/secret/payload_pb2_grpc.py +4 -0
  94. flyte/_protos/secret/secret_pb2.py +38 -0
  95. flyte/_protos/secret/secret_pb2.pyi +6 -0
  96. flyte/_protos/secret/secret_pb2_grpc.py +198 -0
  97. flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
  98. flyte/_protos/validate/validate/validate_pb2.py +76 -0
  99. flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
  100. flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  101. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  102. flyte/_protos/workflow/queue_service_pb2.py +106 -0
  103. flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
  104. flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  105. flyte/_protos/workflow/run_definition_pb2.py +128 -0
  106. flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
  107. flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  108. flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
  109. flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  110. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  111. flyte/_protos/workflow/run_service_pb2.py +133 -0
  112. flyte/_protos/workflow/run_service_pb2.pyi +175 -0
  113. flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
  114. flyte/_protos/workflow/state_service_pb2.py +58 -0
  115. flyte/_protos/workflow/state_service_pb2.pyi +71 -0
  116. flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
  117. flyte/_protos/workflow/task_definition_pb2.py +72 -0
  118. flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
  119. flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  120. flyte/_protos/workflow/task_service_pb2.py +44 -0
  121. flyte/_protos/workflow/task_service_pb2.pyi +31 -0
  122. flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
  123. flyte/_resources.py +226 -0
  124. flyte/_retry.py +32 -0
  125. flyte/_reusable_environment.py +25 -0
  126. flyte/_run.py +411 -0
  127. flyte/_secret.py +61 -0
  128. flyte/_task.py +367 -0
  129. flyte/_task_environment.py +200 -0
  130. flyte/_timeout.py +47 -0
  131. flyte/_tools.py +27 -0
  132. flyte/_trace.py +128 -0
  133. flyte/_utils/__init__.py +20 -0
  134. flyte/_utils/asyn.py +119 -0
  135. flyte/_utils/coro_management.py +25 -0
  136. flyte/_utils/file_handling.py +72 -0
  137. flyte/_utils/helpers.py +108 -0
  138. flyte/_utils/lazy_module.py +54 -0
  139. flyte/_utils/uv_script_parser.py +49 -0
  140. flyte/_version.py +21 -0
  141. flyte/connectors/__init__.py +0 -0
  142. flyte/errors.py +143 -0
  143. flyte/extras/__init__.py +5 -0
  144. flyte/extras/_container.py +273 -0
  145. flyte/io/__init__.py +11 -0
  146. flyte/io/_dataframe.py +0 -0
  147. flyte/io/_dir.py +448 -0
  148. flyte/io/_file.py +468 -0
  149. flyte/io/pickle/__init__.py +0 -0
  150. flyte/io/pickle/transformer.py +117 -0
  151. flyte/io/structured_dataset/__init__.py +129 -0
  152. flyte/io/structured_dataset/basic_dfs.py +219 -0
  153. flyte/io/structured_dataset/structured_dataset.py +1061 -0
  154. flyte/py.typed +0 -0
  155. flyte/remote/__init__.py +25 -0
  156. flyte/remote/_client/__init__.py +0 -0
  157. flyte/remote/_client/_protocols.py +131 -0
  158. flyte/remote/_client/auth/__init__.py +12 -0
  159. flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
  160. flyte/remote/_client/auth/_authenticators/base.py +397 -0
  161. flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  162. flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
  163. flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
  164. flyte/remote/_client/auth/_authenticators/factory.py +200 -0
  165. flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
  166. flyte/remote/_client/auth/_channel.py +184 -0
  167. flyte/remote/_client/auth/_client_config.py +83 -0
  168. flyte/remote/_client/auth/_default_html.py +32 -0
  169. flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  170. flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
  171. flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
  172. flyte/remote/_client/auth/_keyring.py +143 -0
  173. flyte/remote/_client/auth/_token_client.py +260 -0
  174. flyte/remote/_client/auth/errors.py +16 -0
  175. flyte/remote/_client/controlplane.py +95 -0
  176. flyte/remote/_console.py +18 -0
  177. flyte/remote/_data.py +155 -0
  178. flyte/remote/_logs.py +116 -0
  179. flyte/remote/_project.py +86 -0
  180. flyte/remote/_run.py +873 -0
  181. flyte/remote/_secret.py +132 -0
  182. flyte/remote/_task.py +227 -0
  183. flyte/report/__init__.py +3 -0
  184. flyte/report/_report.py +178 -0
  185. flyte/report/_template.html +124 -0
  186. flyte/storage/__init__.py +24 -0
  187. flyte/storage/_remote_fs.py +34 -0
  188. flyte/storage/_storage.py +251 -0
  189. flyte/storage/_utils.py +5 -0
  190. flyte/types/__init__.py +13 -0
  191. flyte/types/_interface.py +25 -0
  192. flyte/types/_renderer.py +162 -0
  193. flyte/types/_string_literals.py +120 -0
  194. flyte/types/_type_engine.py +2210 -0
  195. flyte/types/_utils.py +80 -0
  196. flyte-0.0.1b0.dist-info/METADATA +179 -0
  197. flyte-0.0.1b0.dist-info/RECORD +390 -0
  198. flyte-0.0.1b0.dist-info/WHEEL +5 -0
  199. flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
  200. flyte-0.0.1b0.dist-info/top_level.txt +1 -0
  201. union/__init__.py +54 -0
  202. union/_api_commons.py +3 -0
  203. union/_bin/__init__.py +0 -0
  204. union/_bin/runtime.py +113 -0
  205. union/_build.py +25 -0
  206. union/_cache/__init__.py +12 -0
  207. union/_cache/cache.py +141 -0
  208. union/_cache/defaults.py +9 -0
  209. union/_cache/policy_function_body.py +42 -0
  210. union/_cli/__init__.py +0 -0
  211. union/_cli/_common.py +263 -0
  212. union/_cli/_create.py +40 -0
  213. union/_cli/_delete.py +23 -0
  214. union/_cli/_deploy.py +120 -0
  215. union/_cli/_get.py +162 -0
  216. union/_cli/_params.py +579 -0
  217. union/_cli/_run.py +150 -0
  218. union/_cli/main.py +72 -0
  219. union/_code_bundle/__init__.py +8 -0
  220. union/_code_bundle/_ignore.py +113 -0
  221. union/_code_bundle/_packaging.py +187 -0
  222. union/_code_bundle/_utils.py +342 -0
  223. union/_code_bundle/bundle.py +176 -0
  224. union/_context.py +146 -0
  225. union/_datastructures.py +295 -0
  226. union/_deploy.py +185 -0
  227. union/_doc.py +29 -0
  228. union/_docstring.py +26 -0
  229. union/_environment.py +43 -0
  230. union/_group.py +31 -0
  231. union/_hash.py +23 -0
  232. union/_image.py +760 -0
  233. union/_initialize.py +585 -0
  234. union/_interface.py +84 -0
  235. union/_internal/__init__.py +3 -0
  236. union/_internal/controllers/__init__.py +77 -0
  237. union/_internal/controllers/_local_controller.py +77 -0
  238. union/_internal/controllers/pbhash.py +39 -0
  239. union/_internal/controllers/remote/__init__.py +40 -0
  240. union/_internal/controllers/remote/_action.py +131 -0
  241. union/_internal/controllers/remote/_client.py +43 -0
  242. union/_internal/controllers/remote/_controller.py +169 -0
  243. union/_internal/controllers/remote/_core.py +341 -0
  244. union/_internal/controllers/remote/_informer.py +260 -0
  245. union/_internal/controllers/remote/_service_protocol.py +44 -0
  246. union/_internal/imagebuild/__init__.py +11 -0
  247. union/_internal/imagebuild/docker_builder.py +416 -0
  248. union/_internal/imagebuild/image_builder.py +243 -0
  249. union/_internal/imagebuild/remote_builder.py +0 -0
  250. union/_internal/resolvers/__init__.py +0 -0
  251. union/_internal/resolvers/_task_module.py +31 -0
  252. union/_internal/resolvers/common.py +24 -0
  253. union/_internal/resolvers/default.py +27 -0
  254. union/_internal/runtime/__init__.py +0 -0
  255. union/_internal/runtime/convert.py +163 -0
  256. union/_internal/runtime/entrypoints.py +121 -0
  257. union/_internal/runtime/io.py +136 -0
  258. union/_internal/runtime/resources_serde.py +134 -0
  259. union/_internal/runtime/task_serde.py +202 -0
  260. union/_internal/runtime/taskrunner.py +179 -0
  261. union/_internal/runtime/types_serde.py +53 -0
  262. union/_logging.py +124 -0
  263. union/_protos/__init__.py +0 -0
  264. union/_protos/common/authorization_pb2.py +66 -0
  265. union/_protos/common/authorization_pb2.pyi +106 -0
  266. union/_protos/common/authorization_pb2_grpc.py +4 -0
  267. union/_protos/common/identifier_pb2.py +71 -0
  268. union/_protos/common/identifier_pb2.pyi +82 -0
  269. union/_protos/common/identifier_pb2_grpc.py +4 -0
  270. union/_protos/common/identity_pb2.py +48 -0
  271. union/_protos/common/identity_pb2.pyi +72 -0
  272. union/_protos/common/identity_pb2_grpc.py +4 -0
  273. union/_protos/common/list_pb2.py +36 -0
  274. union/_protos/common/list_pb2.pyi +69 -0
  275. union/_protos/common/list_pb2_grpc.py +4 -0
  276. union/_protos/common/policy_pb2.py +37 -0
  277. union/_protos/common/policy_pb2.pyi +27 -0
  278. union/_protos/common/policy_pb2_grpc.py +4 -0
  279. union/_protos/common/role_pb2.py +37 -0
  280. union/_protos/common/role_pb2.pyi +51 -0
  281. union/_protos/common/role_pb2_grpc.py +4 -0
  282. union/_protos/common/runtime_version_pb2.py +28 -0
  283. union/_protos/common/runtime_version_pb2.pyi +24 -0
  284. union/_protos/common/runtime_version_pb2_grpc.py +4 -0
  285. union/_protos/logs/dataplane/payload_pb2.py +96 -0
  286. union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
  287. union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
  288. union/_protos/secret/definition_pb2.py +49 -0
  289. union/_protos/secret/definition_pb2.pyi +93 -0
  290. union/_protos/secret/definition_pb2_grpc.py +4 -0
  291. union/_protos/secret/payload_pb2.py +62 -0
  292. union/_protos/secret/payload_pb2.pyi +94 -0
  293. union/_protos/secret/payload_pb2_grpc.py +4 -0
  294. union/_protos/secret/secret_pb2.py +38 -0
  295. union/_protos/secret/secret_pb2.pyi +6 -0
  296. union/_protos/secret/secret_pb2_grpc.py +198 -0
  297. union/_protos/validate/validate/validate_pb2.py +76 -0
  298. union/_protos/workflow/node_execution_service_pb2.py +26 -0
  299. union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
  300. union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
  301. union/_protos/workflow/queue_service_pb2.py +75 -0
  302. union/_protos/workflow/queue_service_pb2.pyi +103 -0
  303. union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
  304. union/_protos/workflow/run_definition_pb2.py +100 -0
  305. union/_protos/workflow/run_definition_pb2.pyi +256 -0
  306. union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
  307. union/_protos/workflow/run_logs_service_pb2.py +41 -0
  308. union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
  309. union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
  310. union/_protos/workflow/run_service_pb2.py +133 -0
  311. union/_protos/workflow/run_service_pb2.pyi +173 -0
  312. union/_protos/workflow/run_service_pb2_grpc.py +412 -0
  313. union/_protos/workflow/state_service_pb2.py +58 -0
  314. union/_protos/workflow/state_service_pb2.pyi +69 -0
  315. union/_protos/workflow/state_service_pb2_grpc.py +138 -0
  316. union/_protos/workflow/task_definition_pb2.py +72 -0
  317. union/_protos/workflow/task_definition_pb2.pyi +65 -0
  318. union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
  319. union/_protos/workflow/task_service_pb2.py +44 -0
  320. union/_protos/workflow/task_service_pb2.pyi +31 -0
  321. union/_protos/workflow/task_service_pb2_grpc.py +104 -0
  322. union/_resources.py +226 -0
  323. union/_retry.py +32 -0
  324. union/_reusable_environment.py +25 -0
  325. union/_run.py +374 -0
  326. union/_secret.py +61 -0
  327. union/_task.py +354 -0
  328. union/_task_environment.py +186 -0
  329. union/_timeout.py +47 -0
  330. union/_tools.py +27 -0
  331. union/_utils/__init__.py +11 -0
  332. union/_utils/asyn.py +119 -0
  333. union/_utils/file_handling.py +71 -0
  334. union/_utils/helpers.py +46 -0
  335. union/_utils/lazy_module.py +54 -0
  336. union/_utils/uv_script_parser.py +49 -0
  337. union/_version.py +21 -0
  338. union/connectors/__init__.py +0 -0
  339. union/errors.py +128 -0
  340. union/extras/__init__.py +5 -0
  341. union/extras/_container.py +263 -0
  342. union/io/__init__.py +11 -0
  343. union/io/_dataframe.py +0 -0
  344. union/io/_dir.py +425 -0
  345. union/io/_file.py +418 -0
  346. union/io/pickle/__init__.py +0 -0
  347. union/io/pickle/transformer.py +117 -0
  348. union/io/structured_dataset/__init__.py +122 -0
  349. union/io/structured_dataset/basic_dfs.py +219 -0
  350. union/io/structured_dataset/structured_dataset.py +1057 -0
  351. union/py.typed +0 -0
  352. union/remote/__init__.py +23 -0
  353. union/remote/_client/__init__.py +0 -0
  354. union/remote/_client/_protocols.py +129 -0
  355. union/remote/_client/auth/__init__.py +12 -0
  356. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  357. union/remote/_client/auth/_authenticators/base.py +391 -0
  358. union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
  359. union/remote/_client/auth/_authenticators/device_code.py +120 -0
  360. union/remote/_client/auth/_authenticators/external_command.py +77 -0
  361. union/remote/_client/auth/_authenticators/factory.py +200 -0
  362. union/remote/_client/auth/_authenticators/pkce.py +515 -0
  363. union/remote/_client/auth/_channel.py +184 -0
  364. union/remote/_client/auth/_client_config.py +83 -0
  365. union/remote/_client/auth/_default_html.py +32 -0
  366. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  367. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
  368. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
  369. union/remote/_client/auth/_keyring.py +154 -0
  370. union/remote/_client/auth/_token_client.py +258 -0
  371. union/remote/_client/auth/errors.py +16 -0
  372. union/remote/_client/controlplane.py +86 -0
  373. union/remote/_data.py +149 -0
  374. union/remote/_logs.py +74 -0
  375. union/remote/_project.py +86 -0
  376. union/remote/_run.py +820 -0
  377. union/remote/_secret.py +132 -0
  378. union/remote/_task.py +193 -0
  379. union/report/__init__.py +3 -0
  380. union/report/_report.py +178 -0
  381. union/report/_template.html +124 -0
  382. union/storage/__init__.py +24 -0
  383. union/storage/_remote_fs.py +34 -0
  384. union/storage/_storage.py +247 -0
  385. union/storage/_utils.py +5 -0
  386. union/types/__init__.py +11 -0
  387. union/types/_renderer.py +162 -0
  388. union/types/_string_literals.py +120 -0
  389. union/types/_type_engine.py +2131 -0
  390. union/types/_utils.py +80 -0
flyte/io/_dir.py ADDED
@@ -0,0 +1,448 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import AsyncIterator, Dict, Generic, Iterator, List, Optional, Type, TypeVar, Union
6
+
7
+ from flyteidl.core import literals_pb2, types_pb2
8
+ from fsspec.asyn import AsyncFileSystem
9
+ from mashumaro.types import SerializableType
10
+ from pydantic import BaseModel, model_validator
11
+
12
+ import flyte.storage as storage
13
+ from flyte.io._file import File
14
+ from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
15
+
16
+ # Type variable for the directory format
17
+ T = TypeVar("T")
18
+
19
+
20
+ class Dir(BaseModel, Generic[T], SerializableType):
21
+ """
22
+ A generic directory class representing a directory with files of a specified format.
23
+ Provides both async and sync interfaces for directory operations.
24
+ Users are responsible for handling all I/O - the type transformer for Dir does not do any automatic uploading
25
+ or downloading of files.
26
+
27
+ The generic type T represents the format of the files in the directory.
28
+
29
+ Example:
30
+ ```python
31
+ # Async usage
32
+ from pandas import DataFrame
33
+ data_dir = Dir[DataFrame](path="s3://my-bucket/data/")
34
+
35
+ # Walk through files
36
+ async for file in data_dir.walk():
37
+ async with file.open() as f:
38
+ content = await f.read()
39
+
40
+ # Sync alternative
41
+ for file in data_dir.walk_sync():
42
+ with file.open_sync() as f:
43
+ content = f.read()
44
+ ```
45
+ """
46
+
47
+ # Represents either a local or remote path.
48
+ path: str
49
+ name: Optional[str] = None
50
+ format: str = ""
51
+
52
+ class Config:
53
+ arbitrary_types_allowed = True
54
+
55
+ @model_validator(mode="before")
56
+ @classmethod
57
+ def pre_init(cls, data):
58
+ if data.get("name") is None:
59
+ data["name"] = Path(data["path"]).name
60
+ return data
61
+
62
+ def _serialize(self) -> Dict[str, Optional[str]]:
63
+ pyd_dump = self.model_dump()
64
+ return pyd_dump
65
+
66
+ @classmethod
67
+ def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> Dir:
68
+ return cls.model_validate(file_dump)
69
+
70
+ @classmethod
71
+ def schema_match(cls, incoming: dict):
72
+ this_schema = cls.model_json_schema()
73
+ current_required = this_schema.get("required")
74
+ incoming_required = incoming.get("required")
75
+ if (
76
+ current_required
77
+ and incoming_required
78
+ and incoming.get("type") == this_schema.get("type")
79
+ and incoming.get("title") == this_schema.get("title")
80
+ and set(current_required) == set(incoming_required)
81
+ ):
82
+ return True
83
+
84
+ async def walk(self, recursive: bool = True, max_depth: Optional[int] = None) -> AsyncIterator[File[T]]:
85
+ """
86
+ Asynchronously walk through the directory and yield File objects.
87
+
88
+ Args:
89
+ recursive: If True, recursively walk subdirectories
90
+ max_depth: Maximum depth for recursive walking
91
+
92
+ Yields:
93
+ File objects for each file found in the directory
94
+
95
+ Example:
96
+ ```python
97
+ async for file in directory.walk():
98
+ local_path = await file.download()
99
+ # Process the file
100
+ ```
101
+ """
102
+ fs = storage.get_underlying_filesystem(path=self.path)
103
+ if recursive is False:
104
+ max_depth = 2
105
+
106
+ # Note if the path is actually just a file, no walking is done.
107
+ if isinstance(fs, AsyncFileSystem):
108
+ async for parent, _, files in fs._walk(self.path, maxdepth=max_depth):
109
+ for file in files:
110
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
111
+ yield File[T](path=full_file)
112
+ else:
113
+ for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
114
+ for file in files:
115
+ if "file" in fs.protocol:
116
+ full_file = os.path.join(parent, file)
117
+ else:
118
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
119
+ yield File[T](path=full_file)
120
+
121
+ def walk_sync(
122
+ self, recursive: bool = True, file_pattern: str = "*", max_depth: Optional[int] = None
123
+ ) -> Iterator[File[T]]:
124
+ """
125
+ Synchronously walk through the directory and yield File objects.
126
+
127
+ Args:
128
+ recursive: If True, recursively walk subdirectories
129
+ file_pattern: Glob pattern to filter files
130
+ max_depth: Maximum depth for recursive walking
131
+
132
+ Yields:
133
+ File objects for each file found in the directory
134
+
135
+ Example:
136
+ ```python
137
+ for file in directory.walk_sync():
138
+ local_path = file.download_sync()
139
+ # Process the file
140
+ ```
141
+ """
142
+ fs = storage.get_underlying_filesystem(path=self.path)
143
+ for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
144
+ for file in files:
145
+ if "file" in fs.protocol:
146
+ full_file = os.path.join(parent, file)
147
+ else:
148
+ full_file = fs.unstrip_protocol(parent + fs.sep + file)
149
+ yield File[T](path=full_file)
150
+
151
+ async def list_files(self) -> List[File[T]]:
152
+ """
153
+ Asynchronously get a list of all files in the directory (non-recursive).
154
+
155
+ Returns:
156
+ A list of File objects
157
+
158
+ Example:
159
+ ```python
160
+ files = await directory.list_files()
161
+ for file in files:
162
+ # Process the file
163
+ ```
164
+ """
165
+ # todo: this should probably also just defer to fsspec.find()
166
+ files = []
167
+ async for file in self.walk(recursive=False):
168
+ files.append(file)
169
+ return files
170
+
171
+ def list_files_sync(self) -> List[File[T]]:
172
+ """
173
+ Synchronously get a list of all files in the directory (non-recursive).
174
+
175
+ Returns:
176
+ A list of File objects
177
+
178
+ Example:
179
+ ```python
180
+ files = directory.list_files_sync()
181
+ for file in files:
182
+ # Process the file
183
+ ```
184
+ """
185
+ return list(self.walk_sync(recursive=False))
186
+
187
+ async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
188
+ """
189
+ Asynchronously download the entire directory to a local path.
190
+
191
+ Args:
192
+ local_path: The local path to download the directory to. If None, a temporary
193
+ directory will be used.
194
+
195
+ Returns:
196
+ The path to the downloaded directory
197
+
198
+ Example:
199
+ ```python
200
+ local_dir = await directory.download('/tmp/my_data/')
201
+ ```
202
+ """
203
+ local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
204
+ if not storage.is_remote(self.path):
205
+ if not local_path or local_path == self.path:
206
+ # Skip copying
207
+ return self.path
208
+ else:
209
+ # Shell out to a thread to copy
210
+ import asyncio
211
+ import shutil
212
+
213
+ async def copy_tree():
214
+ loop = asyncio.get_event_loop()
215
+ await loop.run_in_executor(None, lambda: shutil.copytree(self.path, local_dest, dirs_exist_ok=True))
216
+
217
+ await copy_tree()
218
+ return await storage.get(self.path, local_dest, recursive=True)
219
+
220
+ def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
221
+ """
222
+ Synchronously download the entire directory to a local path.
223
+
224
+ Args:
225
+ local_path: The local path to download the directory to. If None, a temporary
226
+ directory will be used.
227
+
228
+ Returns:
229
+ The path to the downloaded directory
230
+
231
+ Example:
232
+ ```python
233
+ local_dir = directory.download_sync('/tmp/my_data/')
234
+ ```
235
+ """
236
+ local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
237
+ if not storage.is_remote(self.path):
238
+ if not local_path or local_path == self.path:
239
+ # Skip copying
240
+ return self.path
241
+ else:
242
+ # Shell out to a thread to copy
243
+ import shutil
244
+
245
+ shutil.copytree(self.path, local_dest, dirs_exist_ok=True)
246
+
247
+ # Figure this out when we figure out the final synchronicity story
248
+ raise NotImplementedError("Sync download is not implemented for remote paths")
249
+
250
+ @classmethod
251
+ async def from_local(cls, local_path: Union[str, Path], remote_path: Optional[str] = None) -> Dir[T]:
252
+ """
253
+ Asynchronously create a new Dir by uploading a local directory to the configured remote store.
254
+
255
+ Args:
256
+ local_path: Path to the local directory
257
+ remote_path: Optional path to store the directory remotely. If None, a path will be generated.
258
+
259
+ Returns:
260
+ A new Dir instance pointing to the uploaded directory
261
+
262
+ Example:
263
+ ```python
264
+ remote_dir = await Dir[DataFrame].from_local('/tmp/data_dir/', 's3://bucket/data/')
265
+ ```
266
+ """
267
+ local_path_str = str(local_path)
268
+ dirname = os.path.basename(os.path.normpath(local_path_str))
269
+
270
+ output_path = await storage.put(from_path=local_path_str, to_path=remote_path, recursive=True)
271
+ return cls(path=output_path, name=dirname)
272
+
273
+ @classmethod
274
+ def from_local_sync(cls, local_path: Union[str, Path], remote_path: Optional[str] = None) -> Dir[T]:
275
+ """
276
+ Synchronously create a new Dir by uploading a local directory to the configured remote store.
277
+
278
+ Args:
279
+ local_path: Path to the local directory
280
+ remote_path: Optional path to store the directory remotely. If None, a path will be generated.
281
+
282
+ Returns:
283
+ A new Dir instance pointing to the uploaded directory
284
+
285
+ Example:
286
+ ```python
287
+ remote_dir = Dir[DataFrame].from_local_sync('/tmp/data_dir/', 's3://bucket/data/')
288
+ ```
289
+ """
290
+ # Implement this after we figure out the final synchronicity story
291
+ raise NotImplementedError("Sync upload is not implemented for remote paths")
292
+
293
+ async def exists(self) -> bool:
294
+ """
295
+ Asynchronously check if the directory exists.
296
+
297
+ Returns:
298
+ True if the directory exists, False otherwise
299
+
300
+ Example:
301
+ ```python
302
+ if await directory.exists():
303
+ # Process the directory
304
+ ```
305
+ """
306
+ fs = storage.get_underlying_filesystem(path=self.path)
307
+ if isinstance(fs, AsyncFileSystem):
308
+ return await fs._exists(self.path)
309
+ else:
310
+ return fs.exists(self.path)
311
+
312
+ def exists_sync(self) -> bool:
313
+ """
314
+ Synchronously check if the directory exists.
315
+
316
+ Returns:
317
+ True if the directory exists, False otherwise
318
+
319
+ Example:
320
+ ```python
321
+ if directory.exists_sync():
322
+ # Process the directory
323
+ ```
324
+ """
325
+ fs = storage.get_underlying_filesystem(path=self.path)
326
+ return fs.exists(self.path)
327
+
328
+ async def get_file(self, file_name: str) -> Optional[File[T]]:
329
+ """
330
+ Asynchronously get a specific file from the directory.
331
+
332
+ Args:
333
+ file_name: The name of the file to get
334
+
335
+ Returns:
336
+ A File instance if the file exists, None otherwise
337
+
338
+ Example:
339
+ ```python
340
+ file = await directory.get_file("data.csv")
341
+ if file:
342
+ # Process the file
343
+ ```
344
+ """
345
+ fs = storage.get_underlying_filesystem(path=self.path)
346
+ file_path = fs.sep.join([self.path, file_name])
347
+ file = File[T](path=file_path)
348
+
349
+ if fs.exists(file_path):
350
+ return file
351
+ return None
352
+
353
+ def get_file_sync(self, file_name: str) -> Optional[File[T]]:
354
+ """
355
+ Synchronously get a specific file from the directory.
356
+
357
+ Args:
358
+ file_name: The name of the file to get
359
+
360
+ Returns:
361
+ A File instance if the file exists, None otherwise
362
+
363
+ Example:
364
+ ```python
365
+ file = directory.get_file_sync("data.csv")
366
+ if file:
367
+ # Process the file
368
+ ```
369
+ """
370
+ file_path = os.path.join(self.path, file_name)
371
+ file = File[T](path=file_path)
372
+
373
+ if file.exists_sync():
374
+ return file
375
+ return None
376
+
377
+
378
+ class DirTransformer(TypeTransformer[Dir]):
379
+ """
380
+ Transformer for Dir objects. This type transformer does not handle any i/o. That is now the responsibility of the
381
+ user.
382
+ """
383
+
384
+ def __init__(self):
385
+ super().__init__(name="Dir", t=Dir)
386
+
387
+ def get_literal_type(self, t: Type[Dir]) -> types_pb2.LiteralType:
388
+ """Get the Flyte literal type for a File type."""
389
+ return types_pb2.LiteralType(
390
+ blob=types_pb2.BlobType(
391
+ # todo: set format from generic
392
+ format="", # Format is determined by the generic type T
393
+ dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART,
394
+ )
395
+ )
396
+
397
+ async def to_literal(
398
+ self,
399
+ python_val: Dir,
400
+ python_type: Type[Dir],
401
+ expected: types_pb2.LiteralType,
402
+ ) -> literals_pb2.Literal:
403
+ """Convert a Dir object to a Flyte literal."""
404
+ if not isinstance(python_val, Dir):
405
+ raise TypeTransformerFailedError(f"Expected Dir object, received {type(python_val)}")
406
+
407
+ return literals_pb2.Literal(
408
+ scalar=literals_pb2.Scalar(
409
+ blob=literals_pb2.Blob(
410
+ metadata=literals_pb2.BlobMetadata(
411
+ type=types_pb2.BlobType(
412
+ format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART
413
+ )
414
+ ),
415
+ uri=python_val.path,
416
+ )
417
+ )
418
+ )
419
+
420
+ async def to_python_value(
421
+ self,
422
+ lv: literals_pb2.Literal,
423
+ expected_python_type: Type[Dir],
424
+ ) -> Dir:
425
+ """Convert a Flyte literal to a File object."""
426
+ if not lv.scalar.HasField("blob"):
427
+ raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
428
+ if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART:
429
+ raise TypeTransformerFailedError(
430
+ f"Expected multipart, received {lv.scalar.blob.metadata.type.dimensionality}"
431
+ )
432
+
433
+ uri = lv.scalar.blob.uri
434
+ filename = Path(uri).name
435
+ f: Dir = Dir(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
436
+ return f
437
+
438
+ def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[Dir]:
439
+ """Guess the Python type from a Flyte literal type."""
440
+ if (
441
+ literal_type.HasField("blob")
442
+ and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART
443
+ ):
444
+ return Dir
445
+ raise ValueError(f"Cannot guess python type from {literal_type}")
446
+
447
+
448
+ TypeEngine.register(DirTransformer())