flyte 0.0.1b3__py3-none-any.whl → 0.2.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyte might be problematic. Click here for more details.

Files changed (319) hide show
  1. flyte/__init__.py +20 -4
  2. flyte/_bin/runtime.py +33 -7
  3. flyte/_build.py +3 -2
  4. flyte/_cache/cache.py +1 -2
  5. flyte/_code_bundle/_packaging.py +1 -1
  6. flyte/_code_bundle/_utils.py +0 -16
  7. flyte/_code_bundle/bundle.py +43 -12
  8. flyte/_context.py +8 -2
  9. flyte/_deploy.py +56 -15
  10. flyte/_environment.py +45 -4
  11. flyte/_excepthook.py +37 -0
  12. flyte/_group.py +2 -1
  13. flyte/_image.py +8 -4
  14. flyte/_initialize.py +112 -254
  15. flyte/_interface.py +3 -3
  16. flyte/_internal/controllers/__init__.py +19 -6
  17. flyte/_internal/controllers/_local_controller.py +83 -8
  18. flyte/_internal/controllers/_trace.py +2 -1
  19. flyte/_internal/controllers/remote/__init__.py +27 -7
  20. flyte/_internal/controllers/remote/_action.py +7 -2
  21. flyte/_internal/controllers/remote/_client.py +5 -1
  22. flyte/_internal/controllers/remote/_controller.py +159 -26
  23. flyte/_internal/controllers/remote/_core.py +13 -5
  24. flyte/_internal/controllers/remote/_informer.py +4 -4
  25. flyte/_internal/controllers/remote/_service_protocol.py +6 -6
  26. flyte/_internal/imagebuild/docker_builder.py +12 -1
  27. flyte/_internal/imagebuild/image_builder.py +16 -11
  28. flyte/_internal/runtime/convert.py +164 -21
  29. flyte/_internal/runtime/entrypoints.py +1 -1
  30. flyte/_internal/runtime/io.py +3 -3
  31. flyte/_internal/runtime/task_serde.py +140 -20
  32. flyte/_internal/runtime/taskrunner.py +4 -3
  33. flyte/_internal/runtime/types_serde.py +1 -1
  34. flyte/_logging.py +12 -1
  35. flyte/_map.py +215 -0
  36. flyte/_pod.py +19 -0
  37. flyte/_protos/common/list_pb2.py +3 -3
  38. flyte/_protos/common/list_pb2.pyi +2 -0
  39. flyte/_protos/logs/dataplane/payload_pb2.py +28 -24
  40. flyte/_protos/logs/dataplane/payload_pb2.pyi +11 -2
  41. flyte/_protos/workflow/common_pb2.py +27 -0
  42. flyte/_protos/workflow/common_pb2.pyi +14 -0
  43. flyte/_protos/workflow/environment_pb2.py +29 -0
  44. flyte/_protos/workflow/environment_pb2.pyi +12 -0
  45. flyte/_protos/workflow/queue_service_pb2.py +40 -41
  46. flyte/_protos/workflow/queue_service_pb2.pyi +35 -30
  47. flyte/_protos/workflow/queue_service_pb2_grpc.py +15 -15
  48. flyte/_protos/workflow/run_definition_pb2.py +61 -61
  49. flyte/_protos/workflow/run_definition_pb2.pyi +8 -4
  50. flyte/_protos/workflow/run_service_pb2.py +20 -24
  51. flyte/_protos/workflow/run_service_pb2.pyi +2 -6
  52. flyte/_protos/workflow/state_service_pb2.py +36 -28
  53. flyte/_protos/workflow/state_service_pb2.pyi +19 -15
  54. flyte/_protos/workflow/state_service_pb2_grpc.py +28 -28
  55. flyte/_protos/workflow/task_definition_pb2.py +29 -22
  56. flyte/_protos/workflow/task_definition_pb2.pyi +21 -5
  57. flyte/_protos/workflow/task_service_pb2.py +27 -11
  58. flyte/_protos/workflow/task_service_pb2.pyi +29 -1
  59. flyte/_protos/workflow/task_service_pb2_grpc.py +34 -0
  60. flyte/_run.py +166 -95
  61. flyte/_task.py +110 -28
  62. flyte/_task_environment.py +55 -72
  63. flyte/_trace.py +6 -14
  64. flyte/_utils/__init__.py +6 -0
  65. flyte/_utils/async_cache.py +139 -0
  66. flyte/_utils/coro_management.py +0 -2
  67. flyte/_utils/helpers.py +45 -19
  68. flyte/_utils/org_discovery.py +57 -0
  69. flyte/_version.py +2 -2
  70. flyte/cli/__init__.py +3 -0
  71. flyte/cli/_abort.py +28 -0
  72. flyte/{_cli → cli}/_common.py +73 -23
  73. flyte/cli/_create.py +145 -0
  74. flyte/{_cli → cli}/_delete.py +4 -4
  75. flyte/{_cli → cli}/_deploy.py +26 -14
  76. flyte/cli/_gen.py +163 -0
  77. flyte/{_cli → cli}/_get.py +98 -23
  78. {union/_cli → flyte/cli}/_params.py +106 -147
  79. flyte/{_cli → cli}/_run.py +99 -20
  80. flyte/cli/main.py +166 -0
  81. flyte/config/__init__.py +3 -0
  82. flyte/config/_config.py +216 -0
  83. flyte/config/_internal.py +64 -0
  84. flyte/config/_reader.py +207 -0
  85. flyte/errors.py +29 -0
  86. flyte/extras/_container.py +33 -43
  87. flyte/io/__init__.py +17 -1
  88. flyte/io/_dir.py +2 -2
  89. flyte/io/_file.py +3 -4
  90. flyte/io/{structured_dataset → _structured_dataset}/basic_dfs.py +1 -1
  91. flyte/io/{structured_dataset → _structured_dataset}/structured_dataset.py +1 -1
  92. flyte/{_datastructures.py → models.py} +56 -7
  93. flyte/remote/__init__.py +2 -1
  94. flyte/remote/_client/_protocols.py +2 -0
  95. flyte/remote/_client/auth/_auth_utils.py +14 -0
  96. flyte/remote/_client/auth/_channel.py +34 -3
  97. flyte/remote/_client/auth/_token_client.py +3 -3
  98. flyte/remote/_client/controlplane.py +13 -13
  99. flyte/remote/_console.py +1 -1
  100. flyte/remote/_data.py +10 -6
  101. flyte/remote/_logs.py +89 -29
  102. flyte/remote/_project.py +8 -9
  103. flyte/remote/_run.py +228 -131
  104. flyte/remote/_secret.py +12 -12
  105. flyte/remote/_task.py +179 -15
  106. flyte/report/_report.py +4 -4
  107. flyte/storage/__init__.py +5 -0
  108. flyte/storage/_config.py +233 -0
  109. flyte/storage/_storage.py +23 -3
  110. flyte/syncify/__init__.py +56 -0
  111. flyte/syncify/_api.py +371 -0
  112. flyte/types/__init__.py +23 -0
  113. flyte/types/_interface.py +22 -7
  114. flyte/{io/pickle/transformer.py → types/_pickle.py} +2 -1
  115. flyte/types/_type_engine.py +95 -18
  116. flyte-0.2.0a0.dist-info/METADATA +249 -0
  117. flyte-0.2.0a0.dist-info/RECORD +218 -0
  118. {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/entry_points.txt +1 -1
  119. flyte/_api_commons.py +0 -3
  120. flyte/_cli/__init__.py +0 -0
  121. flyte/_cli/_create.py +0 -42
  122. flyte/_cli/main.py +0 -72
  123. flyte/_internal/controllers/pbhash.py +0 -39
  124. flyte/io/_dataframe.py +0 -0
  125. flyte/io/pickle/__init__.py +0 -0
  126. flyte-0.0.1b3.dist-info/METADATA +0 -179
  127. flyte-0.0.1b3.dist-info/RECORD +0 -390
  128. union/__init__.py +0 -54
  129. union/_api_commons.py +0 -3
  130. union/_bin/__init__.py +0 -0
  131. union/_bin/runtime.py +0 -113
  132. union/_build.py +0 -25
  133. union/_cache/__init__.py +0 -12
  134. union/_cache/cache.py +0 -141
  135. union/_cache/defaults.py +0 -9
  136. union/_cache/policy_function_body.py +0 -42
  137. union/_cli/__init__.py +0 -0
  138. union/_cli/_common.py +0 -263
  139. union/_cli/_create.py +0 -40
  140. union/_cli/_delete.py +0 -23
  141. union/_cli/_deploy.py +0 -120
  142. union/_cli/_get.py +0 -162
  143. union/_cli/_run.py +0 -150
  144. union/_cli/main.py +0 -72
  145. union/_code_bundle/__init__.py +0 -8
  146. union/_code_bundle/_ignore.py +0 -113
  147. union/_code_bundle/_packaging.py +0 -187
  148. union/_code_bundle/_utils.py +0 -342
  149. union/_code_bundle/bundle.py +0 -176
  150. union/_context.py +0 -146
  151. union/_datastructures.py +0 -295
  152. union/_deploy.py +0 -185
  153. union/_doc.py +0 -29
  154. union/_docstring.py +0 -26
  155. union/_environment.py +0 -43
  156. union/_group.py +0 -31
  157. union/_hash.py +0 -23
  158. union/_image.py +0 -760
  159. union/_initialize.py +0 -585
  160. union/_interface.py +0 -84
  161. union/_internal/__init__.py +0 -3
  162. union/_internal/controllers/__init__.py +0 -77
  163. union/_internal/controllers/_local_controller.py +0 -77
  164. union/_internal/controllers/pbhash.py +0 -39
  165. union/_internal/controllers/remote/__init__.py +0 -40
  166. union/_internal/controllers/remote/_action.py +0 -131
  167. union/_internal/controllers/remote/_client.py +0 -43
  168. union/_internal/controllers/remote/_controller.py +0 -169
  169. union/_internal/controllers/remote/_core.py +0 -341
  170. union/_internal/controllers/remote/_informer.py +0 -260
  171. union/_internal/controllers/remote/_service_protocol.py +0 -44
  172. union/_internal/imagebuild/__init__.py +0 -11
  173. union/_internal/imagebuild/docker_builder.py +0 -416
  174. union/_internal/imagebuild/image_builder.py +0 -243
  175. union/_internal/imagebuild/remote_builder.py +0 -0
  176. union/_internal/resolvers/__init__.py +0 -0
  177. union/_internal/resolvers/_task_module.py +0 -31
  178. union/_internal/resolvers/common.py +0 -24
  179. union/_internal/resolvers/default.py +0 -27
  180. union/_internal/runtime/__init__.py +0 -0
  181. union/_internal/runtime/convert.py +0 -163
  182. union/_internal/runtime/entrypoints.py +0 -121
  183. union/_internal/runtime/io.py +0 -136
  184. union/_internal/runtime/resources_serde.py +0 -134
  185. union/_internal/runtime/task_serde.py +0 -202
  186. union/_internal/runtime/taskrunner.py +0 -179
  187. union/_internal/runtime/types_serde.py +0 -53
  188. union/_logging.py +0 -124
  189. union/_protos/__init__.py +0 -0
  190. union/_protos/common/authorization_pb2.py +0 -66
  191. union/_protos/common/authorization_pb2.pyi +0 -106
  192. union/_protos/common/identifier_pb2.py +0 -71
  193. union/_protos/common/identifier_pb2.pyi +0 -82
  194. union/_protos/common/identity_pb2.py +0 -48
  195. union/_protos/common/identity_pb2.pyi +0 -72
  196. union/_protos/common/identity_pb2_grpc.py +0 -4
  197. union/_protos/common/list_pb2.py +0 -36
  198. union/_protos/common/list_pb2.pyi +0 -69
  199. union/_protos/common/list_pb2_grpc.py +0 -4
  200. union/_protos/common/policy_pb2.py +0 -37
  201. union/_protos/common/policy_pb2.pyi +0 -27
  202. union/_protos/common/policy_pb2_grpc.py +0 -4
  203. union/_protos/common/role_pb2.py +0 -37
  204. union/_protos/common/role_pb2.pyi +0 -51
  205. union/_protos/common/role_pb2_grpc.py +0 -4
  206. union/_protos/common/runtime_version_pb2.py +0 -28
  207. union/_protos/common/runtime_version_pb2.pyi +0 -24
  208. union/_protos/common/runtime_version_pb2_grpc.py +0 -4
  209. union/_protos/logs/dataplane/payload_pb2.py +0 -96
  210. union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  211. union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  212. union/_protos/secret/definition_pb2.py +0 -49
  213. union/_protos/secret/definition_pb2.pyi +0 -93
  214. union/_protos/secret/definition_pb2_grpc.py +0 -4
  215. union/_protos/secret/payload_pb2.py +0 -62
  216. union/_protos/secret/payload_pb2.pyi +0 -94
  217. union/_protos/secret/payload_pb2_grpc.py +0 -4
  218. union/_protos/secret/secret_pb2.py +0 -38
  219. union/_protos/secret/secret_pb2.pyi +0 -6
  220. union/_protos/secret/secret_pb2_grpc.py +0 -198
  221. union/_protos/validate/validate/validate_pb2.py +0 -76
  222. union/_protos/workflow/node_execution_service_pb2.py +0 -26
  223. union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  224. union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  225. union/_protos/workflow/queue_service_pb2.py +0 -75
  226. union/_protos/workflow/queue_service_pb2.pyi +0 -103
  227. union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  228. union/_protos/workflow/run_definition_pb2.py +0 -100
  229. union/_protos/workflow/run_definition_pb2.pyi +0 -256
  230. union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  231. union/_protos/workflow/run_logs_service_pb2.py +0 -41
  232. union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  233. union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  234. union/_protos/workflow/run_service_pb2.py +0 -133
  235. union/_protos/workflow/run_service_pb2.pyi +0 -173
  236. union/_protos/workflow/run_service_pb2_grpc.py +0 -412
  237. union/_protos/workflow/state_service_pb2.py +0 -58
  238. union/_protos/workflow/state_service_pb2.pyi +0 -69
  239. union/_protos/workflow/state_service_pb2_grpc.py +0 -138
  240. union/_protos/workflow/task_definition_pb2.py +0 -72
  241. union/_protos/workflow/task_definition_pb2.pyi +0 -65
  242. union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  243. union/_protos/workflow/task_service_pb2.py +0 -44
  244. union/_protos/workflow/task_service_pb2.pyi +0 -31
  245. union/_protos/workflow/task_service_pb2_grpc.py +0 -104
  246. union/_resources.py +0 -226
  247. union/_retry.py +0 -32
  248. union/_reusable_environment.py +0 -25
  249. union/_run.py +0 -374
  250. union/_secret.py +0 -61
  251. union/_task.py +0 -354
  252. union/_task_environment.py +0 -186
  253. union/_timeout.py +0 -47
  254. union/_tools.py +0 -27
  255. union/_utils/__init__.py +0 -11
  256. union/_utils/asyn.py +0 -119
  257. union/_utils/file_handling.py +0 -71
  258. union/_utils/helpers.py +0 -46
  259. union/_utils/lazy_module.py +0 -54
  260. union/_utils/uv_script_parser.py +0 -49
  261. union/_version.py +0 -21
  262. union/connectors/__init__.py +0 -0
  263. union/errors.py +0 -128
  264. union/extras/__init__.py +0 -5
  265. union/extras/_container.py +0 -263
  266. union/io/__init__.py +0 -11
  267. union/io/_dataframe.py +0 -0
  268. union/io/_dir.py +0 -425
  269. union/io/_file.py +0 -418
  270. union/io/pickle/__init__.py +0 -0
  271. union/io/pickle/transformer.py +0 -117
  272. union/io/structured_dataset/__init__.py +0 -122
  273. union/io/structured_dataset/basic_dfs.py +0 -219
  274. union/io/structured_dataset/structured_dataset.py +0 -1057
  275. union/py.typed +0 -0
  276. union/remote/__init__.py +0 -23
  277. union/remote/_client/__init__.py +0 -0
  278. union/remote/_client/_protocols.py +0 -129
  279. union/remote/_client/auth/__init__.py +0 -12
  280. union/remote/_client/auth/_authenticators/__init__.py +0 -0
  281. union/remote/_client/auth/_authenticators/base.py +0 -391
  282. union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
  283. union/remote/_client/auth/_authenticators/device_code.py +0 -120
  284. union/remote/_client/auth/_authenticators/external_command.py +0 -77
  285. union/remote/_client/auth/_authenticators/factory.py +0 -200
  286. union/remote/_client/auth/_authenticators/pkce.py +0 -515
  287. union/remote/_client/auth/_channel.py +0 -184
  288. union/remote/_client/auth/_client_config.py +0 -83
  289. union/remote/_client/auth/_default_html.py +0 -32
  290. union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
  291. union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
  292. union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
  293. union/remote/_client/auth/_keyring.py +0 -154
  294. union/remote/_client/auth/_token_client.py +0 -258
  295. union/remote/_client/auth/errors.py +0 -16
  296. union/remote/_client/controlplane.py +0 -86
  297. union/remote/_data.py +0 -149
  298. union/remote/_logs.py +0 -74
  299. union/remote/_project.py +0 -86
  300. union/remote/_run.py +0 -820
  301. union/remote/_secret.py +0 -132
  302. union/remote/_task.py +0 -193
  303. union/report/__init__.py +0 -3
  304. union/report/_report.py +0 -178
  305. union/report/_template.html +0 -124
  306. union/storage/__init__.py +0 -24
  307. union/storage/_remote_fs.py +0 -34
  308. union/storage/_storage.py +0 -247
  309. union/storage/_utils.py +0 -5
  310. union/types/__init__.py +0 -11
  311. union/types/_renderer.py +0 -162
  312. union/types/_string_literals.py +0 -120
  313. union/types/_type_engine.py +0 -2131
  314. union/types/_utils.py +0 -80
  315. /union/_protos/common/authorization_pb2_grpc.py → /flyte/_protos/workflow/common_pb2_grpc.py +0 -0
  316. /union/_protos/common/identifier_pb2_grpc.py → /flyte/_protos/workflow/environment_pb2_grpc.py +0 -0
  317. /flyte/io/{structured_dataset → _structured_dataset}/__init__.py +0 -0
  318. {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/WHEEL +0 -0
  319. {flyte-0.0.1b3.dist-info → flyte-0.2.0a0.dist-info}/top_level.txt +0 -0
@@ -1,219 +0,0 @@
1
- import os
2
- import typing
3
- from pathlib import Path
4
- from typing import TypeVar
5
-
6
- from flyteidl.core import literals_pb2, types_pb2
7
- from fsspec.core import split_protocol, strip_protocol
8
-
9
- import union.storage as storage
10
- from union._logging import logger
11
- from union._utils import lazy_module
12
- from union.io.structured_dataset.structured_dataset import (
13
- CSV,
14
- PARQUET,
15
- StructuredDataset,
16
- StructuredDatasetDecoder,
17
- StructuredDatasetEncoder,
18
- )
19
-
20
- if typing.TYPE_CHECKING:
21
- import pandas as pd
22
- import pyarrow as pa
23
- else:
24
- pd = lazy_module("pandas")
25
- pa = lazy_module("pyarrow")
26
-
27
- T = TypeVar("T")
28
-
29
-
30
- # pr: add back after storage
31
- def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
32
- from pandas.io.common import is_fsspec_url
33
-
34
- if is_fsspec_url(uri):
35
- if uri.startswith("s3"):
36
- # pr: after storage, replace with real call to get_fsspec_storage_options
37
- return {
38
- "cache_regions": True,
39
- "client_kwargs": {"endpoint_url": "http://localhost:30002"},
40
- "key": "minio",
41
- "secret": "miniostorage",
42
- }
43
- return {}
44
-
45
- # Pandas does not allow storage_options for non-fsspec paths e.g. local.
46
- return None
47
-
48
-
49
- class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
50
- def __init__(self):
51
- super().__init__(pd.DataFrame, None, CSV)
52
-
53
- async def encode(
54
- self,
55
- structured_dataset: StructuredDataset,
56
- structured_dataset_type: types_pb2.StructuredDatasetType,
57
- ) -> literals_pb2.StructuredDataset:
58
- if not structured_dataset.uri:
59
- from union._context import internal_ctx
60
-
61
- ctx = internal_ctx()
62
- uri = ctx.raw_data.get_random_remote_path()
63
- else:
64
- uri = typing.cast(str, structured_dataset.uri)
65
-
66
- if not storage.is_remote(uri):
67
- Path(uri).mkdir(parents=True, exist_ok=True)
68
- path = os.path.join(uri, ".csv")
69
- df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
70
- df.to_csv(
71
- path,
72
- index=False,
73
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
74
- )
75
- structured_dataset_type.format = CSV
76
- return literals_pb2.StructuredDataset(
77
- uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
78
- )
79
-
80
-
81
- class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
82
- def __init__(self):
83
- super().__init__(pd.DataFrame, None, CSV)
84
-
85
- async def decode(
86
- self,
87
- proto_value: literals_pb2.StructuredDataset,
88
- current_task_metadata: literals_pb2.StructuredDatasetMetadata,
89
- ) -> "pd.DataFrame":
90
- from botocore.exceptions import NoCredentialsError
91
-
92
- uri = proto_value.uri
93
- columns = None
94
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
95
- path = os.path.join(uri, ".csv")
96
- if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
97
- columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
98
- try:
99
- return pd.read_csv(path, usecols=columns, storage_options=kwargs)
100
- except NoCredentialsError:
101
- logger.debug("S3 source detected, attempting anonymous S3 access")
102
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
103
- return pd.read_csv(path, usecols=columns, storage_options=kwargs)
104
-
105
-
106
- class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
107
- def __init__(self):
108
- super().__init__(pd.DataFrame, None, PARQUET)
109
-
110
- async def encode(
111
- self,
112
- structured_dataset: StructuredDataset,
113
- structured_dataset_type: types_pb2.StructuredDatasetType,
114
- ) -> literals_pb2.StructuredDataset:
115
- if not structured_dataset.uri:
116
- from union._context import internal_ctx
117
-
118
- ctx = internal_ctx()
119
- uri = str(ctx.raw_data.get_random_remote_path())
120
- else:
121
- uri = typing.cast(str, structured_dataset.uri)
122
-
123
- if not storage.is_remote(uri):
124
- Path(uri).mkdir(parents=True, exist_ok=True)
125
- path = os.path.join(uri, f"{0:05}")
126
- df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
127
- df.to_parquet(
128
- path,
129
- coerce_timestamps="us",
130
- allow_truncated_timestamps=False,
131
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
132
- )
133
- structured_dataset_type.format = PARQUET
134
- return literals_pb2.StructuredDataset(
135
- uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
136
- )
137
-
138
-
139
- class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
140
- def __init__(self):
141
- super().__init__(pd.DataFrame, None, PARQUET)
142
-
143
- async def decode(
144
- self,
145
- flyte_value: literals_pb2.StructuredDataset,
146
- current_task_metadata: literals_pb2.StructuredDatasetMetadata,
147
- ) -> "pd.DataFrame":
148
- from botocore.exceptions import NoCredentialsError
149
-
150
- uri = flyte_value.uri
151
- columns = None
152
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
153
- if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
154
- columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
155
- try:
156
- return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
157
- except NoCredentialsError:
158
- logger.debug("S3 source detected, attempting anonymous S3 access")
159
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
160
- return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
161
-
162
-
163
- class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
164
- def __init__(self):
165
- super().__init__(pa.Table, None, PARQUET)
166
-
167
- async def encode(
168
- self,
169
- structured_dataset: StructuredDataset,
170
- structured_dataset_type: types_pb2.StructuredDatasetType,
171
- ) -> literals_pb2.StructuredDataset:
172
- import pyarrow.parquet as pq
173
-
174
- if not structured_dataset.uri:
175
- from union._context import internal_ctx
176
-
177
- ctx = internal_ctx()
178
- uri = ctx.raw_data.get_random_remote_path()
179
- else:
180
- uri = typing.cast(str, structured_dataset.uri)
181
-
182
- if not storage.is_remote(uri):
183
- Path(uri).mkdir(parents=True, exist_ok=True)
184
- path = os.path.join(uri, f"{0:05}")
185
- filesystem = storage.get_underlying_filesystem(path=path)
186
- pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
187
- return literals_pb2.StructuredDataset(
188
- uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
189
- )
190
-
191
-
192
- class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
193
- def __init__(self):
194
- super().__init__(pa.Table, None, PARQUET)
195
-
196
- async def decode(
197
- self,
198
- proto_value: literals_pb2.StructuredDataset,
199
- current_task_metadata: literals_pb2.StructuredDatasetMetadata,
200
- ) -> "pa.Table":
201
- import pyarrow.parquet as pq
202
- from botocore.exceptions import NoCredentialsError
203
-
204
- uri = proto_value.uri
205
- if not storage.is_remote(uri):
206
- Path(uri).parent.mkdir(parents=True, exist_ok=True)
207
- _, path = split_protocol(uri)
208
-
209
- columns = None
210
- if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
211
- columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
212
- try:
213
- return pq.read_table(path, columns=columns)
214
- except NoCredentialsError as e:
215
- logger.debug("S3 source detected, attempting anonymous S3 access")
216
- fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
217
- if fs is not None:
218
- return pq.read_table(path, filesystem=fs, columns=columns)
219
- raise e