flyte 0.2.0b1__py3-none-any.whl → 2.0.0b46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. flyte/__init__.py +83 -30
  2. flyte/_bin/connect.py +61 -0
  3. flyte/_bin/debug.py +38 -0
  4. flyte/_bin/runtime.py +87 -19
  5. flyte/_bin/serve.py +351 -0
  6. flyte/_build.py +3 -2
  7. flyte/_cache/cache.py +6 -5
  8. flyte/_cache/local_cache.py +216 -0
  9. flyte/_code_bundle/_ignore.py +31 -5
  10. flyte/_code_bundle/_packaging.py +42 -11
  11. flyte/_code_bundle/_utils.py +57 -34
  12. flyte/_code_bundle/bundle.py +130 -27
  13. flyte/_constants.py +1 -0
  14. flyte/_context.py +21 -5
  15. flyte/_custom_context.py +73 -0
  16. flyte/_debug/constants.py +37 -0
  17. flyte/_debug/utils.py +17 -0
  18. flyte/_debug/vscode.py +315 -0
  19. flyte/_deploy.py +396 -75
  20. flyte/_deployer.py +109 -0
  21. flyte/_environment.py +94 -11
  22. flyte/_excepthook.py +37 -0
  23. flyte/_group.py +2 -1
  24. flyte/_hash.py +1 -16
  25. flyte/_image.py +544 -231
  26. flyte/_initialize.py +456 -316
  27. flyte/_interface.py +40 -5
  28. flyte/_internal/controllers/__init__.py +22 -8
  29. flyte/_internal/controllers/_local_controller.py +159 -35
  30. flyte/_internal/controllers/_trace.py +18 -10
  31. flyte/_internal/controllers/remote/__init__.py +38 -9
  32. flyte/_internal/controllers/remote/_action.py +82 -12
  33. flyte/_internal/controllers/remote/_client.py +6 -2
  34. flyte/_internal/controllers/remote/_controller.py +290 -64
  35. flyte/_internal/controllers/remote/_core.py +155 -95
  36. flyte/_internal/controllers/remote/_informer.py +40 -20
  37. flyte/_internal/controllers/remote/_service_protocol.py +2 -2
  38. flyte/_internal/imagebuild/__init__.py +2 -10
  39. flyte/_internal/imagebuild/docker_builder.py +391 -84
  40. flyte/_internal/imagebuild/image_builder.py +111 -55
  41. flyte/_internal/imagebuild/remote_builder.py +409 -0
  42. flyte/_internal/imagebuild/utils.py +79 -0
  43. flyte/_internal/resolvers/_app_env_module.py +92 -0
  44. flyte/_internal/resolvers/_task_module.py +5 -38
  45. flyte/_internal/resolvers/app_env.py +26 -0
  46. flyte/_internal/resolvers/common.py +8 -1
  47. flyte/_internal/resolvers/default.py +2 -2
  48. flyte/_internal/runtime/convert.py +319 -36
  49. flyte/_internal/runtime/entrypoints.py +106 -18
  50. flyte/_internal/runtime/io.py +71 -23
  51. flyte/_internal/runtime/resources_serde.py +21 -7
  52. flyte/_internal/runtime/reuse.py +125 -0
  53. flyte/_internal/runtime/rusty.py +196 -0
  54. flyte/_internal/runtime/task_serde.py +239 -66
  55. flyte/_internal/runtime/taskrunner.py +48 -8
  56. flyte/_internal/runtime/trigger_serde.py +162 -0
  57. flyte/_internal/runtime/types_serde.py +7 -16
  58. flyte/_keyring/file.py +115 -0
  59. flyte/_link.py +30 -0
  60. flyte/_logging.py +241 -42
  61. flyte/_map.py +312 -0
  62. flyte/_metrics.py +59 -0
  63. flyte/_module.py +74 -0
  64. flyte/_pod.py +30 -0
  65. flyte/_resources.py +296 -33
  66. flyte/_retry.py +1 -7
  67. flyte/_reusable_environment.py +72 -7
  68. flyte/_run.py +462 -132
  69. flyte/_secret.py +47 -11
  70. flyte/_serve.py +333 -0
  71. flyte/_task.py +245 -56
  72. flyte/_task_environment.py +219 -97
  73. flyte/_task_plugins.py +47 -0
  74. flyte/_tools.py +8 -8
  75. flyte/_trace.py +15 -24
  76. flyte/_trigger.py +1027 -0
  77. flyte/_utils/__init__.py +12 -1
  78. flyte/_utils/asyn.py +3 -1
  79. flyte/_utils/async_cache.py +139 -0
  80. flyte/_utils/coro_management.py +5 -4
  81. flyte/_utils/description_parser.py +19 -0
  82. flyte/_utils/docker_credentials.py +173 -0
  83. flyte/_utils/helpers.py +45 -19
  84. flyte/_utils/module_loader.py +123 -0
  85. flyte/_utils/org_discovery.py +57 -0
  86. flyte/_utils/uv_script_parser.py +8 -1
  87. flyte/_version.py +16 -3
  88. flyte/app/__init__.py +27 -0
  89. flyte/app/_app_environment.py +362 -0
  90. flyte/app/_connector_environment.py +40 -0
  91. flyte/app/_deploy.py +130 -0
  92. flyte/app/_parameter.py +343 -0
  93. flyte/app/_runtime/__init__.py +3 -0
  94. flyte/app/_runtime/app_serde.py +383 -0
  95. flyte/app/_types.py +113 -0
  96. flyte/app/extras/__init__.py +9 -0
  97. flyte/app/extras/_auth_middleware.py +217 -0
  98. flyte/app/extras/_fastapi.py +93 -0
  99. flyte/app/extras/_model_loader/__init__.py +3 -0
  100. flyte/app/extras/_model_loader/config.py +7 -0
  101. flyte/app/extras/_model_loader/loader.py +288 -0
  102. flyte/cli/__init__.py +12 -0
  103. flyte/cli/_abort.py +28 -0
  104. flyte/cli/_build.py +114 -0
  105. flyte/cli/_common.py +493 -0
  106. flyte/cli/_create.py +371 -0
  107. flyte/cli/_delete.py +45 -0
  108. flyte/cli/_deploy.py +401 -0
  109. flyte/cli/_gen.py +316 -0
  110. flyte/cli/_get.py +446 -0
  111. flyte/cli/_option.py +33 -0
  112. flyte/{_cli → cli}/_params.py +57 -17
  113. flyte/cli/_plugins.py +209 -0
  114. flyte/cli/_prefetch.py +292 -0
  115. flyte/cli/_run.py +690 -0
  116. flyte/cli/_serve.py +338 -0
  117. flyte/cli/_update.py +86 -0
  118. flyte/cli/_user.py +20 -0
  119. flyte/cli/main.py +246 -0
  120. flyte/config/__init__.py +2 -167
  121. flyte/config/_config.py +215 -163
  122. flyte/config/_internal.py +10 -1
  123. flyte/config/_reader.py +225 -0
  124. flyte/connectors/__init__.py +11 -0
  125. flyte/connectors/_connector.py +330 -0
  126. flyte/connectors/_server.py +194 -0
  127. flyte/connectors/utils.py +159 -0
  128. flyte/errors.py +134 -2
  129. flyte/extend.py +24 -0
  130. flyte/extras/_container.py +69 -56
  131. flyte/git/__init__.py +3 -0
  132. flyte/git/_config.py +279 -0
  133. flyte/io/__init__.py +8 -1
  134. flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
  135. flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
  136. flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
  137. flyte/io/_dir.py +575 -113
  138. flyte/io/_file.py +587 -141
  139. flyte/io/_hashing_io.py +342 -0
  140. flyte/io/extend.py +7 -0
  141. flyte/models.py +635 -0
  142. flyte/prefetch/__init__.py +22 -0
  143. flyte/prefetch/_hf_model.py +563 -0
  144. flyte/remote/__init__.py +14 -3
  145. flyte/remote/_action.py +879 -0
  146. flyte/remote/_app.py +346 -0
  147. flyte/remote/_auth_metadata.py +42 -0
  148. flyte/remote/_client/_protocols.py +62 -4
  149. flyte/remote/_client/auth/_auth_utils.py +19 -0
  150. flyte/remote/_client/auth/_authenticators/base.py +8 -2
  151. flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
  152. flyte/remote/_client/auth/_authenticators/factory.py +4 -0
  153. flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
  154. flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
  155. flyte/remote/_client/auth/_channel.py +47 -18
  156. flyte/remote/_client/auth/_client_config.py +5 -3
  157. flyte/remote/_client/auth/_keyring.py +15 -2
  158. flyte/remote/_client/auth/_token_client.py +3 -3
  159. flyte/remote/_client/controlplane.py +206 -18
  160. flyte/remote/_common.py +66 -0
  161. flyte/remote/_data.py +107 -22
  162. flyte/remote/_logs.py +116 -33
  163. flyte/remote/_project.py +21 -19
  164. flyte/remote/_run.py +164 -631
  165. flyte/remote/_secret.py +72 -29
  166. flyte/remote/_task.py +387 -46
  167. flyte/remote/_trigger.py +368 -0
  168. flyte/remote/_user.py +43 -0
  169. flyte/report/_report.py +10 -6
  170. flyte/storage/__init__.py +13 -1
  171. flyte/storage/_config.py +237 -0
  172. flyte/storage/_parallel_reader.py +289 -0
  173. flyte/storage/_storage.py +268 -59
  174. flyte/syncify/__init__.py +56 -0
  175. flyte/syncify/_api.py +414 -0
  176. flyte/types/__init__.py +39 -0
  177. flyte/types/_interface.py +22 -7
  178. flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
  179. flyte/types/_string_literals.py +8 -9
  180. flyte/types/_type_engine.py +226 -126
  181. flyte/types/_utils.py +1 -1
  182. flyte-2.0.0b46.data/scripts/debug.py +38 -0
  183. flyte-2.0.0b46.data/scripts/runtime.py +194 -0
  184. flyte-2.0.0b46.dist-info/METADATA +352 -0
  185. flyte-2.0.0b46.dist-info/RECORD +221 -0
  186. flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
  187. flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
  188. flyte/_api_commons.py +0 -3
  189. flyte/_cli/_common.py +0 -299
  190. flyte/_cli/_create.py +0 -42
  191. flyte/_cli/_delete.py +0 -23
  192. flyte/_cli/_deploy.py +0 -140
  193. flyte/_cli/_get.py +0 -235
  194. flyte/_cli/_run.py +0 -174
  195. flyte/_cli/main.py +0 -98
  196. flyte/_datastructures.py +0 -342
  197. flyte/_internal/controllers/pbhash.py +0 -39
  198. flyte/_protos/common/authorization_pb2.py +0 -66
  199. flyte/_protos/common/authorization_pb2.pyi +0 -108
  200. flyte/_protos/common/authorization_pb2_grpc.py +0 -4
  201. flyte/_protos/common/identifier_pb2.py +0 -71
  202. flyte/_protos/common/identifier_pb2.pyi +0 -82
  203. flyte/_protos/common/identifier_pb2_grpc.py +0 -4
  204. flyte/_protos/common/identity_pb2.py +0 -48
  205. flyte/_protos/common/identity_pb2.pyi +0 -72
  206. flyte/_protos/common/identity_pb2_grpc.py +0 -4
  207. flyte/_protos/common/list_pb2.py +0 -36
  208. flyte/_protos/common/list_pb2.pyi +0 -69
  209. flyte/_protos/common/list_pb2_grpc.py +0 -4
  210. flyte/_protos/common/policy_pb2.py +0 -37
  211. flyte/_protos/common/policy_pb2.pyi +0 -27
  212. flyte/_protos/common/policy_pb2_grpc.py +0 -4
  213. flyte/_protos/common/role_pb2.py +0 -37
  214. flyte/_protos/common/role_pb2.pyi +0 -53
  215. flyte/_protos/common/role_pb2_grpc.py +0 -4
  216. flyte/_protos/common/runtime_version_pb2.py +0 -28
  217. flyte/_protos/common/runtime_version_pb2.pyi +0 -24
  218. flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
  219. flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
  220. flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
  221. flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
  222. flyte/_protos/secret/definition_pb2.py +0 -49
  223. flyte/_protos/secret/definition_pb2.pyi +0 -93
  224. flyte/_protos/secret/definition_pb2_grpc.py +0 -4
  225. flyte/_protos/secret/payload_pb2.py +0 -62
  226. flyte/_protos/secret/payload_pb2.pyi +0 -94
  227. flyte/_protos/secret/payload_pb2_grpc.py +0 -4
  228. flyte/_protos/secret/secret_pb2.py +0 -38
  229. flyte/_protos/secret/secret_pb2.pyi +0 -6
  230. flyte/_protos/secret/secret_pb2_grpc.py +0 -198
  231. flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
  232. flyte/_protos/validate/validate/validate_pb2.py +0 -76
  233. flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
  234. flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
  235. flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
  236. flyte/_protos/workflow/queue_service_pb2.py +0 -106
  237. flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
  238. flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
  239. flyte/_protos/workflow/run_definition_pb2.py +0 -128
  240. flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
  241. flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
  242. flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
  243. flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
  244. flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
  245. flyte/_protos/workflow/run_service_pb2.py +0 -133
  246. flyte/_protos/workflow/run_service_pb2.pyi +0 -175
  247. flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
  248. flyte/_protos/workflow/state_service_pb2.py +0 -58
  249. flyte/_protos/workflow/state_service_pb2.pyi +0 -71
  250. flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
  251. flyte/_protos/workflow/task_definition_pb2.py +0 -72
  252. flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
  253. flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
  254. flyte/_protos/workflow/task_service_pb2.py +0 -44
  255. flyte/_protos/workflow/task_service_pb2.pyi +0 -31
  256. flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
  257. flyte/io/_dataframe.py +0 -0
  258. flyte/io/pickle/__init__.py +0 -0
  259. flyte/remote/_console.py +0 -18
  260. flyte-0.2.0b1.dist-info/METADATA +0 -179
  261. flyte-0.2.0b1.dist-info/RECORD +0 -204
  262. flyte-0.2.0b1.dist-info/entry_points.txt +0 -3
  263. /flyte/{_cli → _debug}/__init__.py +0 -0
  264. /flyte/{_protos → _keyring}/__init__.py +0 -0
  265. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
  266. {flyte-0.2.0b1.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,18 @@ import typing
3
3
  from pathlib import Path
4
4
  from typing import TypeVar
5
5
 
6
- from flyteidl.core import literals_pb2, types_pb2
6
+ from flyteidl2.core import literals_pb2, types_pb2
7
7
  from fsspec.core import split_protocol, strip_protocol
8
8
 
9
9
  import flyte.storage as storage
10
10
  from flyte._logging import logger
11
11
  from flyte._utils import lazy_module
12
- from flyte.io.structured_dataset.structured_dataset import (
12
+ from flyte.io._dataframe.dataframe import (
13
13
  CSV,
14
14
  PARQUET,
15
- StructuredDataset,
16
- StructuredDatasetDecoder,
17
- StructuredDatasetEncoder,
15
+ DataFrame,
16
+ DataFrameDecoder,
17
+ DataFrameEncoder,
18
18
  )
19
19
 
20
20
  if typing.TYPE_CHECKING:
@@ -27,58 +27,51 @@ else:
27
27
  T = TypeVar("T")
28
28
 
29
29
 
30
- # pr: add back after storage
31
- def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
30
+ def get_pandas_storage_options(uri: str, anonymous: bool = False) -> typing.Optional[typing.Dict]:
32
31
  from pandas.io.common import is_fsspec_url # type: ignore
33
32
 
34
33
  if is_fsspec_url(uri):
35
34
  if uri.startswith("s3"):
36
- # pr: after storage, replace with real call to get_fsspec_storage_options
37
- return {
38
- "cache_regions": True,
39
- "client_kwargs": {"endpoint_url": "http://localhost:30002"},
40
- "key": "minio",
41
- "secret": "miniostorage",
42
- }
35
+ return storage.get_configured_fsspec_kwargs("s3", anonymous=anonymous)
43
36
  return {}
44
37
 
45
38
  # Pandas does not allow storage_options for non-fsspec paths e.g. local.
46
39
  return None
47
40
 
48
41
 
49
- class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
42
+ class PandasToCSVEncodingHandler(DataFrameEncoder):
50
43
  def __init__(self):
51
44
  super().__init__(pd.DataFrame, None, CSV)
52
45
 
53
46
  async def encode(
54
47
  self,
55
- structured_dataset: StructuredDataset,
48
+ dataframe: DataFrame,
56
49
  structured_dataset_type: types_pb2.StructuredDatasetType,
57
50
  ) -> literals_pb2.StructuredDataset:
58
- if not structured_dataset.uri:
51
+ if not dataframe.uri:
59
52
  from flyte._context import internal_ctx
60
53
 
61
54
  ctx = internal_ctx()
62
55
  uri = ctx.raw_data.get_random_remote_path()
63
56
  else:
64
- uri = typing.cast(str, structured_dataset.uri)
57
+ uri = typing.cast(str, dataframe.uri)
65
58
 
66
59
  if not storage.is_remote(uri):
67
60
  Path(uri).mkdir(parents=True, exist_ok=True)
68
- path = os.path.join(uri, ".csv")
69
- df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
61
+ csv_file = storage.join(uri, "data.csv")
62
+ df = typing.cast(pd.DataFrame, dataframe.val)
70
63
  df.to_csv(
71
- path,
64
+ csv_file,
72
65
  index=False,
73
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
66
+ storage_options=get_pandas_storage_options(uri=csv_file),
74
67
  )
75
68
  structured_dataset_type.format = CSV
76
69
  return literals_pb2.StructuredDataset(
77
- uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
70
+ uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
78
71
  )
79
72
 
80
73
 
81
- class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
74
+ class CSVToPandasDecodingHandler(DataFrameDecoder):
82
75
  def __init__(self):
83
76
  super().__init__(pd.DataFrame, None, CSV)
84
77
 
@@ -87,48 +80,58 @@ class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
87
80
  proto_value: literals_pb2.StructuredDataset,
88
81
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
89
82
  ) -> "pd.DataFrame":
90
- from botocore.exceptions import NoCredentialsError
91
-
92
83
  uri = proto_value.uri
93
84
  columns = None
94
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
95
- path = os.path.join(uri, ".csv")
85
+ kwargs = get_pandas_storage_options(uri=uri)
86
+ csv_file = storage.join(uri, "data.csv")
96
87
  if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
97
88
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
98
89
  try:
99
- return pd.read_csv(path, usecols=columns, storage_options=kwargs)
100
- except NoCredentialsError:
101
- logger.debug("S3 source detected, attempting anonymous S3 access")
102
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
103
- return pd.read_csv(path, usecols=columns, storage_options=kwargs)
104
-
105
-
106
- class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
90
+ import io
91
+
92
+ # The pattern used here is a bit wonky because of obstore issues with csv, getting early eof error.
93
+ buf = io.BytesIO()
94
+ async for chunk in storage.get_stream(csv_file):
95
+ buf.write(chunk)
96
+ buf.seek(0)
97
+ df = pd.read_csv(buf)
98
+ return df
99
+
100
+ except Exception as exc:
101
+ if exc.__class__.__name__ == "NoCredentialsError":
102
+ logger.debug("S3 source detected, attempting anonymous S3 access")
103
+ kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
104
+ return pd.read_csv(csv_file, usecols=columns, storage_options=kwargs)
105
+ else:
106
+ raise
107
+
108
+
109
+ class PandasToParquetEncodingHandler(DataFrameEncoder):
107
110
  def __init__(self):
108
111
  super().__init__(pd.DataFrame, None, PARQUET)
109
112
 
110
113
  async def encode(
111
114
  self,
112
- structured_dataset: StructuredDataset,
115
+ dataframe: DataFrame,
113
116
  structured_dataset_type: types_pb2.StructuredDatasetType,
114
117
  ) -> literals_pb2.StructuredDataset:
115
- if not structured_dataset.uri:
118
+ if not dataframe.uri:
116
119
  from flyte._context import internal_ctx
117
120
 
118
121
  ctx = internal_ctx()
119
122
  uri = str(ctx.raw_data.get_random_remote_path())
120
123
  else:
121
- uri = typing.cast(str, structured_dataset.uri)
124
+ uri = typing.cast(str, dataframe.uri)
122
125
 
123
126
  if not storage.is_remote(uri):
124
127
  Path(uri).mkdir(parents=True, exist_ok=True)
125
128
  path = os.path.join(uri, f"{0:05}")
126
- df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
129
+ df = typing.cast(pd.DataFrame, dataframe.val)
127
130
  df.to_parquet(
128
131
  path,
129
132
  coerce_timestamps="us",
130
133
  allow_truncated_timestamps=False,
131
- storage_options=get_pandas_storage_options(uri=path, data_config=None),
134
+ storage_options=get_pandas_storage_options(uri=path),
132
135
  )
133
136
  structured_dataset_type.format = PARQUET
134
137
  return literals_pb2.StructuredDataset(
@@ -136,7 +139,7 @@ class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
136
139
  )
137
140
 
138
141
 
139
- class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
142
+ class ParquetToPandasDecodingHandler(DataFrameDecoder):
140
143
  def __init__(self):
141
144
  super().__init__(pd.DataFrame, None, PARQUET)
142
145
 
@@ -145,51 +148,53 @@ class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
145
148
  flyte_value: literals_pb2.StructuredDataset,
146
149
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
147
150
  ) -> "pd.DataFrame":
148
- from botocore.exceptions import NoCredentialsError
149
-
150
151
  uri = flyte_value.uri
151
152
  columns = None
152
- kwargs = get_pandas_storage_options(uri=uri, data_config=None)
153
+ kwargs = get_pandas_storage_options(uri=uri)
153
154
  if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
154
155
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
155
156
  try:
156
157
  return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
157
- except NoCredentialsError:
158
- logger.debug("S3 source detected, attempting anonymous S3 access")
159
- kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
160
- return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
158
+ except Exception as exc:
159
+ if exc.__class__.__name__ == "NoCredentialsError":
160
+ logger.debug("S3 source detected, attempting anonymous S3 access")
161
+ kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
162
+ return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
163
+ else:
164
+ raise
161
165
 
162
166
 
163
- class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
167
+ class ArrowToParquetEncodingHandler(DataFrameEncoder):
164
168
  def __init__(self):
165
169
  super().__init__(pa.Table, None, PARQUET)
166
170
 
167
171
  async def encode(
168
172
  self,
169
- structured_dataset: StructuredDataset,
170
- structured_dataset_type: types_pb2.StructuredDatasetType,
173
+ dataframe: DataFrame,
174
+ dataframe_type: types_pb2.StructuredDatasetType,
171
175
  ) -> literals_pb2.StructuredDataset:
172
176
  import pyarrow.parquet as pq
173
177
 
174
- if not structured_dataset.uri:
178
+ if not dataframe.uri:
175
179
  from flyte._context import internal_ctx
176
180
 
177
181
  ctx = internal_ctx()
178
182
  uri = ctx.raw_data.get_random_remote_path()
179
183
  else:
180
- uri = typing.cast(str, structured_dataset.uri)
184
+ uri = typing.cast(str, dataframe.uri)
181
185
 
182
186
  if not storage.is_remote(uri):
183
187
  Path(uri).mkdir(parents=True, exist_ok=True)
184
188
  path = os.path.join(uri, f"{0:05}")
185
189
  filesystem = storage.get_underlying_filesystem(path=path)
186
- pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
190
+ pq.write_table(dataframe.val, strip_protocol(path), filesystem=filesystem)
187
191
  return literals_pb2.StructuredDataset(
188
- uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
192
+ uri=uri,
193
+ metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=dataframe_type),
189
194
  )
190
195
 
191
196
 
192
- class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
197
+ class ParquetToArrowDecodingHandler(DataFrameDecoder):
193
198
  def __init__(self):
194
199
  super().__init__(pa.Table, None, PARQUET)
195
200
 
@@ -199,21 +204,23 @@ class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
199
204
  current_task_metadata: literals_pb2.StructuredDatasetMetadata,
200
205
  ) -> "pa.Table":
201
206
  import pyarrow.parquet as pq
202
- from botocore.exceptions import NoCredentialsError
203
207
 
204
- uri = proto_value.uri
205
- if not storage.is_remote(uri):
206
- Path(uri).parent.mkdir(parents=True, exist_ok=True)
207
- _, path = split_protocol(uri)
208
+ path = proto_value.uri
209
+ if not storage.is_remote(path):
210
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
211
+ _, path = split_protocol(path)
208
212
 
209
213
  columns = None
210
214
  if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
211
215
  columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
212
216
  try:
213
217
  return pq.read_table(path, columns=columns)
214
- except NoCredentialsError as e:
215
- logger.debug("S3 source detected, attempting anonymous S3 access")
216
- fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
217
- if fs is not None:
218
- return pq.read_table(path, filesystem=fs, columns=columns)
219
- raise e
218
+ except Exception as exc:
219
+ if exc.__class__.__name__ == "NoCredentialsError":
220
+ logger.debug("S3 source detected, attempting anonymous S3 access")
221
+ fs = storage.get_underlying_filesystem(path=path, anonymous=True)
222
+ if fs is not None:
223
+ return pq.read_table(path, filesystem=fs, columns=columns)
224
+ return None
225
+ else:
226
+ raise